Spaces:

danhtran2mind
/

ControlNet-Image-Generator

Sleeping

App Files Files Community

danhtran2mind commited on Aug 12

Commit

f56ede2

verified ·

1 Parent(s): 041f0c4

Upload 68 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +13 -0
LICENSE +21 -0
apps/gradio_app.py +187 -0
apps/gradio_app/__init__.py +0 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/config.json +12 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/yoga.jpg +0 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/config.json +12 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/config.json +12 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png +3 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/config.json +12 -0
apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/man_and_sword.jpg +0 -0
apps/gradio_app/examples.py +99 -0
apps/gradio_app/inference.py +45 -0
apps/gradio_app/project_info.py +37 -0
apps/gradio_app/setup_scripts.py +59 -0
apps/gradio_app/static/style.css +574 -0
apps/old-gradio_app.py +177 -0
assets/.gitkeep +0 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/config.json +12 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/yoga.jpg +0 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/config.json +12 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/config.json +12 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png +3 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/config.json +12 -0
assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/man_and_sword.jpg +0 -0
ckpts/.gitignore +2 -0
configs/.gitkeep +0 -0
configs/datasets_info.yaml +3 -0
configs/model_ckpts.yaml +16 -0
data/.gitignore +2 -0
docs/inference/inference_doc.md +176 -0
docs/scripts/download_ckpts_doc.md +29 -0
docs/scripts/download_datasets_doc.md +20 -0
docs/training/training_doc.md +106 -0
notebooks/SD-2.1-Openpose-ControlNet.ipynb +0 -0
requirements/requirements.txt +7 -0
requirements/requirements_compatible.txt +7 -0
scripts/download_ckpts.py +58 -0
scripts/download_datasets.py +48 -0
scripts/setup_third_party.py +38 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg filter=lfs diff=lfs merge=lfs -text
+apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg filter=lfs diff=lfs merge=lfs -text
+assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png filter=lfs diff=lfs merge=lfs -text
+tests/test_data/a_man_is_doing_yoga_in_a_serene_park_0.png filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Danh Tran
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

apps/gradio_app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import os
+import subprocess
+import gradio as gr
+import random
+from gradio_app.inference import run_inference
+from gradio_app.examples import load_examples, select_example
+from gradio_app.project_info import (
+    NAME,
+    CONTENT_DESCRIPTION,
+    CONTENT_IN_1,
+    CONTENT_OUT_1
+)
+def run_setup_script():
+    setup_script = os.path.join(os.path.dirname(__file__), "gradio_app", "setup_scripts.py")
+    try:
+        result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"Setup script failed with error: {e.stderr}")
+        return f"Setup script failed: {e.stderr}"
+def stop_app():
+    """Function to stop the Gradio app."""
+    try:
+        gr.Interface.close_all()  # Attempt to close all running Gradio interfaces
+        return "Application stopped successfully."
+    except Exception as e:
+        return f"Error stopping application: {str(e)}"
+def create_gui():
+    try:
+        custom_css = open("apps/gradio_app/static/style.css").read()
+    except FileNotFoundError:
+        print("Error: style.css not found at gradio_app/static/style.css")
+        custom_css = ""  # Fallback to empty CSS if file is missing
+    with gr.Blocks(css=custom_css) as demo:
+        gr.Markdown(NAME)
+        gr.HTML(CONTENT_DESCRIPTION)
+        gr.HTML(CONTENT_IN_1)
+        with gr.Row():
+            with gr.Column(scale=2):
+                input_image = gr.Image(type="filepath", label="Input Image")
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    value="a man is doing yoga"
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="monochrome, lowres, bad anatomy, worst quality, low quality"
+                )
+                with gr.Row():
+                    width = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Width"
+                    )
+                    height = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Height"
+                    )
+                with gr.Accordion("Advanced Settings", open=False):
+                    num_steps = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=30,
+                        step=1,
+                        label="Number of Inference Steps"
+                    )
+                    use_random_seed = gr.Checkbox(label="Use Random Seed", value=False)
+                    seed = gr.Slider(
+                        minimum=0,
+                        maximum=2**32 - 1,
+                        value=42,
+                        step=1,
+                        label="Random Seed",
+                        visible=True
+                    )
+                    guidance_scale = gr.Slider(
+                        minimum=1.0,
+                        maximum=20.0,
+                        value=7.5,
+                        step=0.1,
+                        label="Guidance Scale"
+                    )
+                    controlnet_conditioning_scale = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=1.0,
+                        step=0.1,
+                        label="ControlNet Conditioning Scale"
+                    )
+            with gr.Column(scale=3):
+                output_images = gr.Image(label="Generated Images")
+                output_message = gr.Textbox(label="Status")
+                submit_button = gr.Button("Generate Images", elem_classes="submit-btn")
+                stop_button = gr.Button("Stop Application", elem_classes="stop-btn")
+        def update_seed_visibility(use_random):
+            return gr.update(visible=not use_random)
+        use_random_seed.change(
+            fn=update_seed_visibility,
+            inputs=use_random_seed,
+            outputs=seed
+        )
+        # Load examples
+        examples_data = load_examples(os.path.join("apps", "gradio_app",
+            "assets", "examples", "Stable-Diffusion-2.1-Openpose-ControlNet"))
+        examples_component = gr.Examples(
+            examples=examples_data,
+            inputs=[
+                input_image,
+                prompt,
+                negative_prompt,
+                output_images,
+                num_steps,
+                seed,
+                width,
+                height,
+                guidance_scale,
+                controlnet_conditioning_scale,
+                use_random_seed
+            ],
+            outputs=[
+                input_image,
+                prompt,
+                negative_prompt,
+                output_images,
+                num_steps,
+                seed,
+                width,
+                height,
+                guidance_scale,
+                controlnet_conditioning_scale,
+                use_random_seed,
+                output_message
+            ],
+            fn=select_example,
+            cache_examples=False,
+            label="Examples: Yoga Poses"
+        )
+        submit_button.click(
+            fn=run_inference,
+            inputs=[
+                input_image,
+                prompt,
+                negative_prompt,
+                num_steps,
+                seed,
+                width,
+                height,
+                guidance_scale,
+                controlnet_conditioning_scale,
+                use_random_seed,
+            ],
+            outputs=[output_images, output_message]
+        )
+        stop_button.click(
+            fn=stop_app,
+            inputs=[],
+            outputs=[output_message]
+        )
+        gr.HTML(CONTENT_OUT_1)
+    return demo
+if __name__ == "__main__":
+    run_setup_script()
+    demo = create_gui()
+    demo.launch(share=True)

apps/gradio_app/__init__.py ADDED Viewed

File without changes

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png ADDED Viewed

Git LFS Details

SHA256: 3dc2b7efb61afd2d6ceda1b32ec9792a5b07f3ac3d7a96d7acdd2102ddb957b7
Pointer size: 131 Bytes
Size of remote file: 367 kB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "yoga.jpg",
+  "output_image": "a_man_is_doing_yoga_in_a_serene_park_0.png",
+  "prompt": "A man is doing yoga in a serene park.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 50,
+  "seed": 100,
+  "width": 512,
+  "height": 512,
+  "guidance_scale": 5.5,
+  "controlnet_conditioning_scale": 0.6
+}

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/yoga.jpg ADDED Viewed

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png ADDED Viewed

Git LFS Details

SHA256: 2e83cc3b007c2303e276b3ac60a8fa930877e584e3534f12e1441ec83ed9e9fd
Pointer size: 132 Bytes
Size of remote file: 1.11 MB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "ride_bike.jpg",
+  "output_image": "a_man_is_galloping_on_a_horse_0.png",
+  "prompt": "A man is galloping on a horse.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 100,
+  "seed": 56,
+  "width": 1080,
+  "height": 720,
+  "guidance_scale": 9.5,
+  "controlnet_conditioning_scale": 0.5
+}

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg ADDED Viewed

Git LFS Details

SHA256: 76310cad16fcf71097c9660d46a95ced0992d48bd92469e83fd25ee59f015998
Pointer size: 131 Bytes
Size of remote file: 164 kB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png ADDED Viewed

Git LFS Details

SHA256: a048958e0ed28806ecb7c9834f91b07a464b73cd641fa19b03f39ff542986530
Pointer size: 132 Bytes
Size of remote file: 1.27 MB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "tennis.jpg",
+  "output_image": "a_woman_is_holding_a_baseball_bat_in_her_hand_0.png",
+  "prompt": "A woman is holding a baseball bat in her hand.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 100,
+  "seed": 765,
+  "width": 990,
+  "height": 720,
+  "guidance_scale": 6.5,
+  "controlnet_conditioning_scale": 0.7
+}

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg ADDED Viewed

Git LFS Details

SHA256: 259845edb5c365bccb33f9207630d829bb5a839e72bf7d0326f11ae4862694fa
Pointer size: 132 Bytes
Size of remote file: 5.61 MB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png ADDED Viewed

Git LFS Details

SHA256: deaa70aba05ab58ea0f9bd16512c6dcc7e0951559037779063045b7c035342f8
Pointer size: 131 Bytes
Size of remote file: 441 kB

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "man_and_sword.jpg",
+  "output_image": "a_woman_raises_a_katana_0.png",
+  "prompt": "A woman raises a katana.",
+  "negative_prompt": "body elongated, fragmentation, many hands, ugly, deformed face",
+  "num_steps": 50,
+  "seed": 78,
+  "width": 540,
+  "height": 512,
+  "guidance_scale": 6.5,
+  "controlnet_conditioning_scale": 0.8
+}

apps/gradio_app/assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/man_and_sword.jpg ADDED Viewed

apps/gradio_app/examples.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import json
+from PIL import Image
+import gradio as gr
+def load_examples(examples_base_path=os.path.join("apps", "gradio_app",
+    "assets", "examples", "Stable-Diffusion-2.1-Openpose-ControlNet")):
+    """Load example configurations and input images from the Stable-Diffusion-2.1-Openpose-ControlNet directory."""
+    examples = []
+    # Iterate through example folders (e.g., '1', '2', '3', '4')
+    for folder in os.listdir(examples_base_path):
+        folder_path = os.path.join(examples_base_path, folder)
+        config_path = os.path.join(folder_path, "config.json")
+        if os.path.exists(config_path):
+            try:
+                with open(config_path, 'r') as f:
+                    config = json.load(f)
+                # Extract configuration fields
+                input_filename = config["input_image"]
+                output_filename = config["output_image"]
+                prompt = config.get("prompt", "a man is doing yoga")
+                negative_prompt = config.get("negative_prompt", "monochrome, lowres, bad anatomy, worst quality, low quality")
+                num_steps = config.get("num_steps", 30)
+                seed = config.get("seed", 42)
+                width = config.get("width", 512)
+                height = config.get("height", 512)
+                guidance_scale = config.get("guidance_scale", 7.5)
+                controlnet_conditioning_scale = config.get("controlnet_conditioning_scale", 1.0)
+                # Construct absolute path for input image
+                input_image_path = os.path.join(folder_path, input_filename)
+                output_image_path = os.path.join(folder_path, output_filename)
+                # Check if input image exists
+                if os.path.exists(input_image_path):
+                    input_image_data = Image.open(input_image_path)
+                    output_image_data = Image.open(output_image_path)
+                    # Append example data in the order expected by Gradio inputs
+                    examples.append([
+                        input_image_data,  # Input image
+                        prompt,
+                        negative_prompt,
+                        output_image_data,
+                        num_steps,
+                        seed,
+                        width,
+                        height,
+                        guidance_scale,
+                        controlnet_conditioning_scale,
+                        False  # use_random_seed, hardcoded as per original gr.Examples
+                    ])
+                else:
+                    print(f"Input image not found at {input_image_path}")
+            except json.JSONDecodeError as e:
+                print(f"Error decoding JSON from {config_path}: {str(e)}")
+            except Exception as e:
+                print(f"Error processing example in {folder_path}: {str(e)}")
+    return examples
+def select_example(evt: gr.SelectData, examples_data):
+    """Handle selection of an example to populate Gradio inputs."""
+    example_index = evt.index
+    # Extract example data
+    # input_image_data, prompt, negative_prompt, output_image_data, num_steps, seed, width, height, guidance_scale, controlnet_conditioning_scale, use_random_seed = examples_data[example_index]
+    (
+        input_image_data,
+        prompt,
+        negative_prompt,
+        output_image_data,
+        num_steps,
+        seed,
+        width,
+        height,
+        guidance_scale,
+        controlnet_conditioning_scale,
+        use_random_seed,
+    ) = examples_data[example_index]
+    # Return values to update Gradio interface inputs and output message
+    return (
+        input_image_data,  # Input image
+        prompt,            # Prompt
+        negative_prompt,   # Negative prompt
+        output_image_data, # Output image
+        num_steps,         # Number of inference steps
+        seed,              # Random seed
+        width,             # Width
+        height,            # Height
+        guidance_scale,    # Guidance scale
+        controlnet_conditioning_scale,  # ControlNet conditioning scale
+        use_random_seed,   # Use random seed
+        f"Loaded example {example_index + 1} with prompt: {prompt}"  # Output message
+    )

apps/gradio_app/inference.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import random
+import os
+import sys
+# Add the project root directory to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+from src.controlnet_image_generator.infer import infer
+def run_inference(
+    input_image,
+    prompt,
+    negative_prompt,
+    num_steps,
+    seed,
+    width,
+    height,
+    guidance_scale,
+    controlnet_conditioning_scale,
+    use_random_seed=False,
+):
+    config_path = "configs/model_ckpts.yaml"
+    if use_random_seed:
+        seed = random.randint(0, 2 ** 32)
+    try:
+        result = infer(
+            config_path=config_path,
+            input_image=input_image,
+            image_url=None,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_steps=num_steps,
+            seed=seed,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+        )
+        result = list(result)[0]
+        return result, "Inference completed successfully"
+    except Exception as e:
+        return [], f"Error during inference: {str(e)}"

apps/gradio_app/project_info.py ADDED Viewed

	@@ -0,0 +1,37 @@

+NAME = """
+# ControlNet Image Generator 🖌️
+""".strip()
+CONTENT_DESCRIPTION = """
+<h3>ControlNet ⚡️ boosts Stable Diffusion with sharp, innovative image generation control 🖌️</h3>
+""".strip()
+# CONTENT_IN_1 = """
+# Transforms low-res anime images into sharp, vibrant HD visuals, enhancing textures and details for artwork and games.
+# """.strip()
+CONTENT_IN_1 = """
+<p class="source">
+    For more information, you can check out my GitHub repository and HuggingFace Model Hub:<br>
+    Source code:
+    <a class="badge" href="https://github.com/danhtran2mind/CoantrolNet-Image-Generator">
+        <img src="https://img.shields.io/badge/GitHub-danhtran2mind%2FControlNet--Image--Generator-blue?style=flat?logo=github" alt="GitHub Repo">
+    </a>,
+    Model Hub:
+    <a class="badge" href="https://huggingface.co/danhtran2mind/Stable-Diffusion-2.1-Openpose-ControlNet">
+        <img src="https://img.shields.io/badge/HuggingFace-danhtran2mind%2FStable--Diffusion--2.1--Openpose--ControlNet-yellow?style=flat?logo=huggingface" alt="HuggingFace Model">
+    </a>.
+</p>
+""".strip()
+CONTENT_OUT_1 = """
+<div class="quote-container">
+    <p>
+        This project is built using code from
+        <a class="badge" href="https://github.com/huggingface/diffusers">
+            <img src="https://img.shields.io/badge/Built%20on-huggingface%2Fdiffusers-blue?style=flat&logo=github" alt="Built on Real-ESRGAN">
+        </a>.
+    </p>
+</div>
+""".strip()

apps/gradio_app/setup_scripts.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import subprocess
+import sys
+import os
+def run_script(script_path, args=None):
+    """
+    Run a Python script using subprocess with optional arguments and handle errors.
+    Returns True if successful, False otherwise.
+    """
+    if not os.path.isfile(script_path):
+        print(f"Script not found: {script_path}")
+        return False
+    try:
+        command = [sys.executable, script_path]
+        if args:
+            command.extend(args)
+        result = subprocess.run(
+            command,
+            check=True,
+            text=True,
+            capture_output=True
+        )
+        print(f"Successfully executed {script_path}")
+        print(result.stdout)
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error executing {script_path}:")
+        print(e.stderr)
+        return False
+    except Exception as e:
+        print(f"Unexpected error executing {script_path}: {str(e)}")
+        return False
+def main():
+    """
+    Main function to execute download_ckpts.py with proper error handling.
+    """
+    scripts_dir = "scripts"
+    scripts = [
+        {
+            "path": os.path.join(scripts_dir, "download_ckpts.py"),
+            "args": []  # Empty list for args to avoid NoneType issues
+        }
+    ]
+    for script in scripts:
+        script_path = script["path"]
+        args = script.get("args", [])  # Safely get args with default empty list
+        print(f"Starting execution of {script_path}{' with args: ' + ' '.join(args) if args else ''}\n")
+        if not run_script(script_path, args):
+            print(f"Stopping execution due to error in {script_path}")
+            sys.exit(1)
+        print(f"Completed execution of {script_path}\n")
+if __name__ == "__main__":
+    main()

apps/gradio_app/static/style.css ADDED Viewed

	@@ -0,0 +1,574 @@

+/* @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); */
+/* ─── palette ───────────────────────────────────────────── */
+body, .gradio-container {
+  font-family: 'Inter', sans-serif;
+  background: #FFFBF7;
+  color: #0F172A;
+}
+a {
+  color: #F97316;
+  text-decoration: none;
+  font-weight: 600;
+}
+a:hover { color: #C2410C; }
+/* ─── headline ──────────────────────────────────────────── */
+#titlebar {
+  text-align: center;
+  margin-top: 2.4rem;
+  margin-bottom: .9rem;
+}
+/* ─── card look ─────────────────────────────────────────── */
+.gr-block,
+.gr-box,
+.gr-row,
+#cite-wrapper {
+  border: 1px solid #F8C89B;
+  border-radius: 10px;
+  background: #fff;
+  box-shadow: 0 3px 6px rgba(0, 0, 0, .05);
+}
+.gr-gallery-item { background: #fff; }
+/* ─── controls / inputs ─────────────────────────────────── */
+.gr-button-primary,
+#copy-btn {
+  background: linear-gradient(90deg, #F97316 0%, #C2410C 100%);
+  border: none;
+  color: #fff;
+  border-radius: 6px;
+  font-weight: 600;
+  transition: transform .12s ease, box-shadow .12s ease;
+}
+.gr-button-primary:hover,
+#copy-btn:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 4px 12px rgba(249, 115, 22, .35);
+}
+.gr-dropdown input {
+  border: 1px solid #F9731699;
+}
+.preview img,
+.preview canvas { object-fit: contain !important; }
+/* ─── hero section ─────────────────────────────────────── */
+#hero-wrapper { text-align: center; }
+#hero-badge {
+  display: inline-block;
+  padding: .85rem 1.2rem;
+  border-radius: 8px;
+  background: #FFEAD2;
+  border: 1px solid #F9731655;
+  font-size: .95rem;
+  font-weight: 600;
+  margin-bottom: .5rem;
+}
+#hero-links {
+  font-size: .95rem;
+  font-weight: 600;
+  margin-bottom: 1.6rem;
+}
+#hero-links img {
+  height: 22px;
+  vertical-align: middle;
+  margin-left: .55rem;
+}
+/* ─── score area ───────────────────────────────────────── */
+#score-area {
+  text-align: center;
+}
+.title-container {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    justify-content: center;
+    margin-bottom: 10px;
+    text-align: center;
+}
+.match-badge {
+  display: inline-block;
+  padding: .35rem .9rem;
+  border-radius: 9999px;
+  font-weight: 600;
+  font-size: 1.25rem;
+}
+/* ─── citation card ────────────────────────────────────── */
+#cite-wrapper {
+  position: relative;
+  padding: .9rem 1rem;
+  margin-top: 2rem;
+}
+#cite-wrapper code {
+  font-family: SFMono-Regular, Consolas, monospace;
+  font-size: .84rem;
+  white-space: pre-wrap;
+  color: #0F172A;
+}
+#copy-btn {
+  position: absolute;
+  top: .55rem;
+  right: .6rem;
+  padding: .18rem .7rem;
+  font-size: .72rem;
+  line-height: 1;
+}
+/* ─── dark mode ────────────────────────────────────── */
+.dark body,
+.dark .gradio-container {
+  background-color: #332a22;
+  color: #e5e7eb;
+}
+.dark .gr-block,
+.dark .gr-box,
+.dark .gr-row {
+  background-color: #332a22;
+  border: 1px solid #4b5563;
+}
+.dark .gr-dropdown input {
+  background-color: #332a22;
+  color: #f1f5f9;
+  border: 1px solid #F97316aa;
+}
+.dark #hero-badge {
+  background: #334155;
+  border: 1px solid #F9731655;
+  color: #fefefe;
+}
+.dark #cite-wrapper {
+  background-color: #473f38;
+}
+.dark #bibtex {
+    color: #f8fafc !important;
+}
+.dark .card {
+  background-color: #473f38;
+}
+/* ─── switch logo for light/dark theme ─────────────── */
+.logo-dark { display: none; }
+.dark .logo-light { display: none; }
+.dark .logo-dark { display: inline; }
+/* https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap */
+/* cyrillic-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2JL7SUc.woff2) format('woff2');
+  unicode-range: U+0460-052F, U+1C80-1C8A, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
+}
+/* cyrillic */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa0ZL7SUc.woff2) format('woff2');
+  unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
+}
+/* greek-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2ZL7SUc.woff2) format('woff2');
+  unicode-range: U+1F00-1FFF;
+}
+/* greek */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1pL7SUc.woff2) format('woff2');
+  unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF;
+}
+/* vietnamese */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2pL7SUc.woff2) format('woff2');
+  unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;
+}
+/* latin-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa25L7SUc.woff2) format('woff2');
+  unicode-range: U+0100-02BA, U+02BD-02C5, U+02C7-02CC, U+02CE-02D7, U+02DD-02FF, U+0304, U+0308, U+0329, U+1D00-1DBF, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF;
+}
+/* latin */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 400;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1ZL7.woff2) format('woff2');
+  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
+}
+/* cyrillic-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2JL7SUc.woff2) format('woff2');
+  unicode-range: U+0460-052F, U+1C80-1C8A, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
+}
+/* cyrillic */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa0ZL7SUc.woff2) format('woff2');
+  unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
+}
+/* greek-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2ZL7SUc.woff2) format('woff2');
+  unicode-range: U+1F00-1FFF;
+}
+/* greek */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1pL7SUc.woff2) format('woff2');
+  unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF;
+}
+/* vietnamese */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2pL7SUc.woff2) format('woff2');
+  unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;
+}
+/* latin-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa25L7SUc.woff2) format('woff2');
+  unicode-range: U+0100-02BA, U+02BD-02C5, U+02C7-02CC, U+02CE-02D7, U+02DD-02FF, U+0304, U+0308, U+0329, U+1D00-1DBF, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF;
+}
+/* latin */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 500;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1ZL7.woff2) format('woff2');
+  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
+}
+/* cyrillic-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2JL7SUc.woff2) format('woff2');
+  unicode-range: U+0460-052F, U+1C80-1C8A, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
+}
+/* cyrillic */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa0ZL7SUc.woff2) format('woff2');
+  unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
+}
+/* greek-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2ZL7SUc.woff2) format('woff2');
+  unicode-range: U+1F00-1FFF;
+}
+/* greek */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1pL7SUc.woff2) format('woff2');
+  unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF;
+}
+/* vietnamese */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2pL7SUc.woff2) format('woff2');
+  unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;
+}
+/* latin-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa25L7SUc.woff2) format('woff2');
+  unicode-range: U+0100-02BA, U+02BD-02C5, U+02C7-02CC, U+02CE-02D7, U+02DD-02FF, U+0304, U+0308, U+0329, U+1D00-1DBF, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF;
+}
+/* latin */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 600;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1ZL7.woff2) format('woff2');
+  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
+}
+/* cyrillic-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2JL7SUc.woff2) format('woff2');
+  unicode-range: U+0460-052F, U+1C80-1C8A, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
+}
+/* cyrillic */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa0ZL7SUc.woff2) format('woff2');
+  unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
+}
+/* greek-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2ZL7SUc.woff2) format('woff2');
+  unicode-range: U+1F00-1FFF;
+}
+/* greek */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1pL7SUc.woff2) format('woff2');
+  unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF;
+}
+/* vietnamese */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2pL7SUc.woff2) format('woff2');
+  unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;
+}
+/* latin-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa25L7SUc.woff2) format('woff2');
+  unicode-range: U+0100-02BA, U+02BD-02C5, U+02C7-02CC, U+02CE-02D7, U+02DD-02FF, U+0304, U+0308, U+0329, U+1D00-1DBF, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF;
+}
+/* latin */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 700;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1ZL7.woff2) format('woff2');
+  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
+}
+/* cyrillic-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2JL7SUc.woff2) format('woff2');
+  unicode-range: U+0460-052F, U+1C80-1C8A, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
+}
+/* cyrillic */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa0ZL7SUc.woff2) format('woff2');
+  unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
+}
+/* greek-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2ZL7SUc.woff2) format('woff2');
+  unicode-range: U+1F00-1FFF;
+}
+/* greek */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1pL7SUc.woff2) format('woff2');
+  unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF;
+}
+/* vietnamese */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa2pL7SUc.woff2) format('woff2');
+  unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;
+}
+/* latin-ext */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa25L7SUc.woff2) format('woff2');
+  unicode-range: U+0100-02BA, U+02BD-02C5, U+02C7-02CC, U+02CE-02D7, U+02DD-02FF, U+0304, U+0308, U+0329, U+1D00-1DBF, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF;
+}
+/* latin */
+@font-face {
+  font-family: 'Inter';
+  font-style: normal;
+  font-weight: 800;
+  font-display: swap;
+  src: url(https://fonts.gstatic.com/s/inter/v19/UcC73FwrK3iLTeHuS_nVMrMxCp50SjIa1ZL7.woff2) format('woff2');
+  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
+}
+/* title_css */
+#title {
+  font-size: 2.6rem;
+  font-weight: 800;
+  margin: 0;
+  line-height: 1.25;
+  color: #0F172A;
+}
+/* brand class is passed in title parameter */
+#title .brand {
+  background: linear-gradient(90deg, #F97316 0%, #C2410C 90%);
+  -webkit-background-clip: text;
+  color: transparent;
+}
+.dark #title {
+  color: #f8fafc;
+}
+.title-container {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    justify-content: center;
+    margin-bottom: 10px;
+    text-align: center;
+}
+/* Dark Mode */
+@media (prefers-color-scheme: dark) {
+  body { @extend .dark; }
+}
+/* Smaller size for input image */
+.input-image img {
+    max-width: 300px;
+    height: auto;
+}
+/* Larger size for output image */
+.output-image img {
+    max-width: 500px;
+    height: auto;
+}
+/* Add styling for warning message */
+.warning-message {
+    color: red;
+    font-size: 14px;
+    margin-top: 5px;
+    display: block;
+}
+#warning-text {
+    min-height: 20px; /* Ensure space for warning */
+}
+/*Components for Gradio App*/
+.quote-container {
+    border-left: 5px solid #007bff;
+    padding-left: 15px;
+    margin-bottom: 15px;
+    font-style: italic;
+}
+.attribution p {
+    margin: 10px 0;
+}
+.badge {
+    display: inline-block;
+    border-radius: 4px;
+    text-decoration: none;
+    font-size: 14px;
+    transition: background-color 0.3s;
+}
+.badge:hover {
+    background-color: #0056b3;
+}
+.badge img {
+    vertical-align: middle;
+    margin-right: 5px;
+}
+.source {
+    font-size: 14px;
+}
+/* Start- Stop Buttons */
+.submit-btn {
+    background-color: #f97316; /* Green background */
+    color: white;
+    font-weight: bold;
+    padding: 8px 16px;
+    border-radius: 6px;
+    border: none;
+    cursor: pointer;
+    transition: background-color 0.3s ease;
+}
+.submit-btn:hover {
+    background-color: #f97416de; /* Darker green on hover */
+}
+.stop-btn {
+    background-color: grey; /* Red background */
+    color: white;
+    font-weight: 600;
+    padding: 8px 16px;
+    border-radius: 6px;
+    border: none;
+    cursor: pointer;
+    transition: background-color 0.3s ease;
+}
+.stop-btn:hover {
+    background-color: rgba(128, 128, 128, 0.858); /* Darker red on hover */
+}

apps/old-gradio_app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+import sys
+import subprocess
+import gradio as gr
+import torch
+import random
+# Add the project root directory to the Python path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from src.controlnet_image_generator.infer import infer
+def run_setup_script():
+    setup_script = os.path.join(os.path.dirname(__file__), "gradio_app", "setup_scripts.py")
+    try:
+        result = subprocess.run(["python", setup_script], capture_output=True, text=True, check=True)
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"Setup script failed with error: {e.stderr}")
+        return f"Setup script failed: {e.stderr}"
+def run_inference(
+    input_image,
+    prompt,
+    negative_prompt,
+    num_steps,
+    seed,
+    width,
+    height,
+    guidance_scale,
+    controlnet_conditioning_scale,
+    use_random_seed=False,
+):
+    config_path = "configs/model_ckpts.yaml"
+    if use_random_seed:
+        seed = random.randint(0, 2 ** 32)
+    try:
+        result = infer(
+            config_path=config_path,
+            input_image=input_image,
+            image_url=None,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_steps=num_steps,
+            seed=seed,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=float(controlnet_conditioning_scale),
+        )
+        result = list(result)[0]
+        return result, "Inference completed successfully"
+    except Exception as e:
+        return [], f"Error during inference: {str(e)}"
+def stop_app():
+    """Function to stop the Gradio app."""
+    try:
+        gr.Interface.close_all()  # Attempt to close all running Gradio interfaces
+        return "Application stopped successfully."
+    except Exception as e:
+        return f"Error stopping application: {str(e)}"
+def create_gui():
+    cuscustom_css = open("apps/gradio_app/static/style.css").read()
+    with gr.Blocks(css=cuscustom_css) as demo:
+        gr.Markdown("# ControlNet Image Generation with Pose Detection")
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(type="filepath", label="Input Image")
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    value="a man is doing yoga"
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="monochrome, lowres, bad anatomy, worst quality, low quality"
+                )
+                with gr.Row():
+                    width = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Width"
+                    )
+                    height = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Height"
+                    )
+                with gr.Accordion("Advanced Settings", open=False):
+                    num_steps = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=30,
+                        step=1,
+                        label="Number of Inference Steps"
+                    )
+                    use_random_seed = gr.Checkbox(label="Use Random Seed", value=False)
+                    seed = gr.Slider(
+                        minimum=0,
+                        maximum=2**32,
+                        value=42,
+                        step=1,
+                        label="Random Seed",
+                        visible=True
+                    )
+                    guidance_scale = gr.Slider(
+                        minimum=1.0,
+                        maximum=20.0,
+                        value=7.5,
+                        step=0.1,
+                        label="Guidance Scale"
+                    )
+                    controlnet_conditioning_scale = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=1.0,
+                        step=0.1,
+                        label="ControlNet Conditioning Scale"
+                    )
+            with gr.Column():
+                output_images = gr.Image(label="Generated Images")
+                output_message = gr.Textbox(label="Status")
+                # with gr.Row():
+                submit_button = gr.Button("Generate Images", elem_classes="submit-btn")
+                stop_button = gr.Button("Stop Application", elem_classes="stop-btn")
+        def update_seed_visibility(use_random):
+            return gr.update(visible=not use_random)
+        use_random_seed.change(
+            fn=update_seed_visibility,
+            inputs=use_random_seed,
+            outputs=seed
+        )
+        submit_button.click(
+            fn=run_inference,
+            inputs=[
+                input_image,
+                prompt,
+                negative_prompt,
+                num_steps,
+                seed,
+                width,
+                height,
+                guidance_scale,
+                controlnet_conditioning_scale,
+                use_random_seed,
+            ],
+            outputs=[output_images, output_message]
+        )
+        stop_button.click(
+            fn=stop_app,
+            inputs=[],
+            outputs=[output_message]
+        )
+    return demo
+if __name__ == "__main__":
+    run_setup_script()
+    demo = create_gui()
+    demo.launch(share=True)

assets/.gitkeep ADDED Viewed

File without changes

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/a_man_is_doing_yoga_in_a_serene_park_0.png ADDED Viewed

Git LFS Details

SHA256: 3dc2b7efb61afd2d6ceda1b32ec9792a5b07f3ac3d7a96d7acdd2102ddb957b7
Pointer size: 131 Bytes
Size of remote file: 367 kB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "yoga.jpg",
+  "output_image": "a_man_is_doing_yoga_in_a_serene_park_0.png",
+  "prompt": "A man is doing yoga in a serene park.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 50,
+  "seed": 100,
+  "width": 512,
+  "height": 512,
+  "guidance_scale": 5.5,
+  "controlnet_conditioning_scale": 0.6
+}

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/1/yoga.jpg ADDED Viewed

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/a_man_is_galloping_on_a_horse_0.png ADDED Viewed

Git LFS Details

SHA256: 2e83cc3b007c2303e276b3ac60a8fa930877e584e3534f12e1441ec83ed9e9fd
Pointer size: 132 Bytes
Size of remote file: 1.11 MB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "ride_bike.jpg",
+  "output_image": "a_man_is_galloping_on_a_horse_0.png",
+  "prompt": "A man is galloping on a horse.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 100,
+  "seed": 56,
+  "width": 1080,
+  "height": 720,
+  "guidance_scale": 9.5,
+  "controlnet_conditioning_scale": 0.5
+}

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/2/ride_bike.jpg ADDED Viewed

Git LFS Details

SHA256: 76310cad16fcf71097c9660d46a95ced0992d48bd92469e83fd25ee59f015998
Pointer size: 131 Bytes
Size of remote file: 164 kB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/a_woman_is_holding_a_baseball_bat_in_her_hand_0.png ADDED Viewed

Git LFS Details

SHA256: a048958e0ed28806ecb7c9834f91b07a464b73cd641fa19b03f39ff542986530
Pointer size: 132 Bytes
Size of remote file: 1.27 MB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "tennis.jpg",
+  "output_image": "a_woman_is_holding_a_baseball_bat_in_her_hand_0.png",
+  "prompt": "A woman is holding a baseball bat in her hand.",
+  "negative_prompt": "monochrome, lowres, bad anatomy, ugly, deformed face",
+  "num_steps": 100,
+  "seed": 765,
+  "width": 990,
+  "height": 720,
+  "guidance_scale": 6.5,
+  "controlnet_conditioning_scale": 0.7
+}

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/3/tennis.jpg ADDED Viewed

Git LFS Details

SHA256: 259845edb5c365bccb33f9207630d829bb5a839e72bf7d0326f11ae4862694fa
Pointer size: 132 Bytes
Size of remote file: 5.61 MB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/a_woman_raises_a_katana_0.png ADDED Viewed

Git LFS Details

SHA256: deaa70aba05ab58ea0f9bd16512c6dcc7e0951559037779063045b7c035342f8
Pointer size: 131 Bytes
Size of remote file: 441 kB

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "input_image": "man_and_sword.jpg",
+  "output_image": "a_woman_raises_a_katana_0.png",
+  "prompt": "A woman raises a katana.",
+  "negative_prompt": "body elongated, fragmentation, many hands, ugly, deformed face",
+  "num_steps": 50,
+  "seed": 78,
+  "width": 540,
+  "height": 512,
+  "guidance_scale": 6.5,
+  "controlnet_conditioning_scale": 0.8
+}

assets/examples/Stable-Diffusion-2.1-Openpose-ControlNet/4/man_and_sword.jpg ADDED Viewed

ckpts/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *
2	+ !.gitignore

configs/.gitkeep ADDED Viewed

File without changes

configs/datasets_info.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+- dataset_name: "HighCWu/open_pose_controlnet_subset"
+  local_dir: "HighCWu-open_pose_controlnet_subset"
+  platform: "HuggingFace"

configs/model_ckpts.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+- model_id: "danhtran2mind/Stable-Diffusion-2.1-Openpose-ControlNet"
+  local_dir: "ckpts/Stable-Diffusion-2.1-Openpose-ControlNet"
+  allow:
+  - diffusion_pytorch_model.safetensors
+  - config.json
+- model_id: "stabilityai/stable-diffusion-2-1"
+  local_dir: "ckpts/stable-diffusion-2-1"
+  deny:
+  - v2-1_768-ema-pruned.ckpt
+  - v2-1_768-ema-pruned.safetensors
+  - v2-1_768-nonema-pruned.ckpt
+  - v2-1_768-nonema-pruned.safetensors
+- model_id: "lllyasviel/ControlNet"
+  local_dir: null

data/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *
2	+ !.gitignore

docs/inference/inference_doc.md ADDED Viewed

	@@ -0,0 +1,176 @@

+# ControlNet Image Generation with Pose Detection
+This document provides a comprehensive overview of a Python script designed for image generation using ControlNet with pose detection, integrated with the Stable Diffusion model. The script processes an input image to detect human poses and generates new images based on a text prompt, guided by the detected poses.
+## Purpose
+The script enables users to generate images that adhere to specific poses extracted from an input image, combining the power of ControlNet for pose conditioning with Stable Diffusion for high-quality image synthesis. It is particularly useful for applications requiring pose-guided image generation, such as creating stylized images of people in specific poses (e.g., yoga, dancing) based on a reference image.
+## Dependencies
+The script relies on the following Python libraries and custom modules:
+- **Standard Libraries**:
+  - `torch`: For tensor operations and deep learning model handling.
+  - `argparse`: For parsing command-line arguments.
+  - `os`: For file and directory operations.
+  - `sys`: For modifying the Python path to include the project root.
+- **Custom Modules** (assumed to be part of the project structure):
+  - `inference.config_loader`:
+    - `load_config`: Loads model configurations from a YAML file.
+    - `find_config_by_model_id`: Retrieves specific model configurations by ID.
+  - `inference.model_initializer`:
+    - `initialize_controlnet`: Initializes the ControlNet model.
+    - `initialize_pipeline`: Initializes the Stable Diffusion pipeline.
+    - `initialize_controlnet_detector`: Initializes the pose detection model.
+  - `inference.device_manager`:
+    - `setup_device`: Configures the computation device (e.g., CPU or GPU).
+  - `inference.image_processor`:
+    - `load_input_image`: Loads the input image from a local path or URL.
+    - `detect_poses`: Detects human poses in the input image.
+  - `inference.image_generator`:
+    - `generate_images`: Generates images using the pipeline and pose conditions.
+    - `save_images`: Saves generated images to the specified directory.
+## Script Structure
+The script is organized into the following components:
+1. **Imports and Path Setup**:
+   - Imports necessary libraries and adds the project root directory to the Python path for accessing custom modules.
+   - Ensures the script can locate custom modules regardless of the execution context.
+2. **Global Variables**:
+   - Defines three global variables to cache initialized models:
+     - `controlnet_detector`: For pose detection.
+     - `controlnet`: For pose-guided conditioning.
+     - `pipe`: The Stable Diffusion pipeline.
+   - These variables persist across multiple calls to the `infer` function to avoid redundant model initialization.
+3. **Main Function: `infer`**:
+   - The core function that orchestrates the image generation process.
+   - Takes configurable parameters for input, model settings, and output options.
+4. **Command-Line Interface**:
+   - Uses `argparse` to provide a user-friendly interface for running the script with customizable parameters.
+## Main Function: `infer`
+The `infer` function handles the end-to-end process of loading models, processing input images, detecting poses, generating images, and optionally saving the results.
+### Parameters
+| Parameter | Type | Description | Default |
+|-----------|------|-------------|---------|
+| `config_path` | `str` | Path to the configuration YAML file. | `"configs/model_ckpts.yaml"` |
+| `input_image` | `str` | Path to the local input image. Mutually exclusive with `image_url`. | `None` |
+| `image_url` | `str` | URL of the input image. Mutually exclusive with `input_image`. | `None` |
+| `prompt` | `str` | Text prompt for image generation. | `"a man is doing yoga"` |
+| `negative_prompt` | `str` | Negative prompt to avoid undesired features. | `"monochrome, lowres, bad anatomy, worst quality, low quality"` |
+| `num_steps` | `int` | Number of inference steps. | `20` |
+| `seed` | `int` | Random seed for reproducibility. | `2` |
+| `width` | `int` | Width of the generated image (pixels). | `512` |
+| `height` | `int` | Height of the generated image (pixels). | `512` |
+| `guidance_scale` | `float` | Guidance scale for prompt adherence. | `7.5` |
+| `controlnet_conditioning_scale` | `float` | ControlNet conditioning scale for pose influence. | `1.0` |
+| `output_dir` | `str` | Directory to save generated images. | `tests/test_data` |
+| `use_prompt_as_output_name` | `bool` | Use prompt in output filenames. | `False` |
+| `save_output` | `bool` | Save generated images to `output_dir`. | `False` |
+### Workflow
+1. **Configuration Loading**:
+   - Loads model configurations from `config_path` using `load_config`.
+   - Retrieves specific configurations for:
+     - Pose detection model (`lllyasviel/ControlNet`).
+     - ControlNet model (`danhtran2mind/Stable-Diffusion-2.1-Openpose-ControlNet`).
+     - Stable Diffusion pipeline (`stabilityai/stable-diffusion-2-1`).
+2. **Model Initialization**:
+   - Checks if `controlnet_detector`, `controlnet`, or `pipe` are `None`.
+   - If `None`, initializes them using the respective configurations to avoid redundant loading.
+3. **Device Setup**:
+   - Configures the computation device (e.g., CPU or GPU) for the pipeline using `setup_device`.
+4. **Image Processing**:
+   - Loads the input image from either `input_image` or `image_url` using `load_input_image`.
+   - Detects poses in the input image using `detect_poses` with the `controlnet_detector`.
+5. **Image Generation**:
+   - Creates a list of random number generators seeded with `seed + i` for each detected pose.
+   - Generates images using `generate_images`, passing:
+     - The pipeline (`pipe`).
+     - Repeated prompts and negative prompts for each pose.
+     - Detected poses as conditioning inputs.
+     - Generators for reproducibility.
+     - Parameters like `num_steps`, `guidance_scale`, `controlnet_conditioning_scale`, `width`, and `height`.
+6. **Output Handling**:
+   - If `save_output` is `True`, saves the generated images to `output_dir` using `save_images`.
+   - If `use_prompt_as_output_name` is `True`, incorporates the prompt into the output filenames.
+   - Returns the list of generated images.
+## Command-Line Interface
+The script includes a command-line interface using `argparse` for flexible execution.
+### Arguments Table
+| Argument | Type | Default Value | Description |
+|----------|------|---------------|-------------|
+| `--input_image` | `str` | `tests/test_data/yoga1.jpg` | Path to the local input image. Mutually exclusive with `--image_url`. |
+| `--image_url` | `str` | `None` | URL of the input image (e.g., `https://huggingface.co/datasets/YiYiXu/controlnet-testing/resolve/main/yoga1.jpeg`). Mutually exclusive with `--input_image`. |
+| `--config_path` | `str` | `configs/model_ckpts.yaml` | Path to the configuration YAML file for model settings. |
+| `--prompt` | `str` | `"a man is doing yoga"` | Text prompt for image generation. |
+| `--negative_prompt` | `str` | `"monochrome, lowres, bad anatomy, worst quality, low quality"` | Negative prompt to avoid undesired features in generated images. |
+| `--num_steps` | `int` | `20` | Number of inference steps for image generation. |
+| `--seed` | `int` | `2` | Random seed for reproducible generation. |
+| `--width` | `int` | `512` | Width of the generated image in pixels. |
+| `--height` | `int` | `512` | Height of the generated image in pixels. |
+| `--guidance_scale` | `float` | `7.5` | Guidance scale for prompt adherence during generation. |
+| `--controlnet_conditioning_scale` | `float` | `1.0` | ControlNet conditioning scale to balance pose influence. |
+| `--output_dir` | `str` | `tests/test_data` | Directory to save generated images. |
+| `--use_prompt_as_output_name` | Flag | `False` | If set, incorporates the prompt into output image filenames. |
+| `--save_output` | Flag | `False` | If set, saves generated images to the specified output directory. |
+### Example Usage
+```bash
+python script.py --input_image tests/test_data/yoga1.jpg --prompt "a woman doing yoga in a park" --num_steps 30 --guidance_scale 8.0 --save_output --use_prompt_as_output_name
+```
+This command:
+- Uses the local image `tests/test_data/yoga1.jpg` as input.
+- Generates images with the prompt `"a woman doing yoga in a park"`.
+- Runs for 30 inference steps with a guidance scale of 8.0.
+- Saves the output images to `tests/test_data`, with filenames including the prompt.
+Alternatively, using a URL:
+```bash
+python script.py --image_url https://huggingface.co/datasets/YiYiXu/controlnet-testing/resolve/main/yoga1.jpeg --prompt "a person practicing yoga at sunset" --save_output
+```
+This command uses an online image and saves the generated images without using the prompt in filenames.
+## Notes
+- **Configuration File**: The script assumes a `configs/model_ckpts.yaml` file exists with configurations for the required models (`lllyasviel/ControlNet`, `danhtran2mind/Stable-Diffusion-2.1-Openpose-ControlNet`, `stabilityai/stable-diffusion-2-1`). Ensure this file is correctly formatted and accessible.
+- **Input Requirements**: The input image (local or URL) should contain at least one person for effective pose detection.
+- **Model Caching**: Global variables cache the models to improve performance for multiple inferences within the same session.
+- **Device Compatibility**: The `setup_device` function determines the computation device. Ensure compatible hardware (e.g., GPU) is available for optimal performance.
+- **Output Flexibility**: The script supports generating multiple images if multiple poses are detected, with each image conditioned on one pose.
+- **Error Handling**: The script assumes the custom modules handle errors appropriately. Users should verify that input paths, URLs, and model configurations are valid.
+## Potential Improvements
+- Add error handling for invalid inputs or missing configuration files.
+- Support batch processing for multiple input images.
+- Allow dynamic model selection via command-line arguments instead of hardcoded model IDs.
+- Include options for adjusting pose detection sensitivity or other model-specific parameters.
+## Conclusion
+This script provides a robust framework for pose-guided image generation using ControlNet and Stable Diffusion. Its modular design and command-line interface make it suitable for both one-off experiments and integration into larger workflows. By leveraging pre-trained models and customizable parameters, it enables users to generate high-quality, pose-conditioned images with minimal setup.

docs/scripts/download_ckpts_doc.md ADDED Viewed

	@@ -0,0 +1,29 @@

+# Download Model Checkpoints
+This script downloads model checkpoints from the Hugging Face Hub based on configurations specified in a YAML file.
+## Functionality
+- **Load Configuration**: Reads a YAML configuration file to get model details.
+- **Download Model**: Downloads files for specified models from the Hugging Face Hub to a local directory.
+  - Checks for a valid `local_dir` in the configuration; skips download if `local_dir` is null.
+  - Creates the local directory if it doesn't exist.
+  - Supports `allow` and `deny` patterns to filter files:
+    - If `allow` patterns are specified, only those files are downloaded.
+    - If no `allow` patterns are provided, all files are downloaded except those matching `deny` patterns.
+  - Uses `hf_hub_download` from the `huggingface_hub` library with symlinks disabled.
+## Command-Line Arguments
+- `--config_path`: Path to the YAML configuration file (defaults to `configs/model_ckpts.yaml`).
+## Dependencies
+- `argparse`: For parsing command-line arguments.
+- `os`: For directory creation.
+- `yaml`: For reading the configuration file.
+- `huggingface_hub`: For downloading files from the Hugging Face Hub.
+## Usage
+Run the script with:
+```bash
+python scripts/download_ckpts.py --config_path <path_to_yaml>
+```
+The script processes each model in the configuration file, printing the model ID and local directory for each.

docs/scripts/download_datasets_doc.md ADDED Viewed

	@@ -0,0 +1,20 @@

+# Download Datasets
+This script downloads datasets from Hugging Face using configuration details specified in a YAML file.
+## Functionality
+- **Load Configuration**: Reads dataset details from a YAML configuration file.
+- **Download Dataset**: Downloads datasets from Hugging Face if the platform is specified as 'HuggingFace' in the configuration.
+- **Command-Line Argument**: Accepts a path to the configuration file via the `--config_path` argument (defaults to `configs/datasets_info.yaml`).
+- **Dataset Information**: Extracts dataset name and local storage directory from the configuration, splits the dataset name into user and model hub components, and saves the dataset to the specified directory.
+- **Verification**: Prints dataset details, including user name, model hub name, storage location, and dataset information for confirmation.
+- **Platform Check**: Only processes datasets from Hugging Face; unsupported platforms are flagged with a message.
+## Usage
+Run the script with the command:
+`python script_name.py --config_path path/to/config.yaml`
+The configuration file should contain:
+- `dataset_name`: Format as `user_name/model_hub_name`.
+- `local_dir`: Directory to save the dataset.
+- `platform`: Must be set to `HuggingFace` for the script to process.

docs/training/training_doc.md ADDED Viewed

	@@ -0,0 +1,106 @@

+# ControlNet Training Documentation
+This document outlines the process for training a ControlNet model using the provided Python scripts (`train.py` and `train_controlnet.py`). The scripts facilitate training a ControlNet model integrated with a Stable Diffusion pipeline for conditional image generation. Below, we describe the training process and provide a detailed table of the command-line arguments used to configure the training.
+## Overview
+The training process involves two main scripts:
+1. **`train.py`**: A wrapper script that executes `train_controlnet.py` with the provided command-line arguments.
+2. **`train_controlnet.py`**: The core script that handles the training of the ControlNet model, including dataset preparation, model initialization, training loop, and validation.
+### Training Workflow
+1. **Argument Parsing**: The script parses command-line arguments to configure the training process, such as model paths, dataset details, and hyperparameters.
+2. **Dataset Preparation**: Loads and preprocesses the dataset (either from HuggingFace Hub or a local directory) with transformations for images and captions.
+3. **Model Initialization**: Loads pretrained models (e.g., Stable Diffusion, VAE, UNet, text encoder) and initializes or loads ControlNet weights.
+4. **Training Loop**: Trains the ControlNet model using the Accelerate library for distributed training, with support for mixed precision, gradient checkpointing, and learning rate scheduling.
+5. **Validation**: Periodically validates the model by generating images using validation prompts and images, logging results to TensorBoard or Weights & Biases.
+6. **Checkpointing and Saving**: Saves model checkpoints during training and the final model to the output directory. Optionally pushes the model to the HuggingFace Hub.
+7. **Model Card Creation**: Generates a model card with training details and example images for documentation.
+## Command-Line Arguments
+The following table describes the command-line arguments available in `train_controlnet.py` for configuring the training process:
+| Argument | Type | Default | Description |
+|----------|------|---------|-------------|
+| `--pretrained_model_name_or_path` | `str` | None | Path to pretrained model or model identifier from huggingface.co/models. Required. |
+| `--controlnet_model_name_or_path` | `str` | None | Path to pretrained ControlNet model or model identifier. If not specified, ControlNet weights are initialized from UNet. |
+| `--revision` | `str` | None | Revision of pretrained model identifier from huggingface.co/models. |
+| `--variant` | `str` | None | Variant of the model files (e.g., 'fp16'). |
+| `--tokenizer_name` | `str` | None | Pretrained tokenizer name or path if different from model_name. |
+| `--output_dir` | `str` | "controlnet-model" | Directory where model predictions and checkpoints are saved. |
+| `--cache_dir` | `str` | None | Directory for storing downloaded models and datasets. |
+| `--seed` | `int` | None | Seed for reproducible training. |
+| `--resolution` | `int` | 512 | Resolution for input images (must be divisible by 8). |
+| `--train_batch_size` | `int` | 4 | Batch size per device for the training dataloader. |
+| `--num_train_epochs` | `int` | 1 | Number of training epochs. |
+| `--max_train_steps` | `int` | None | Total number of training steps. Overrides `num_train_epochs` if provided. |
+| `--checkpointing_steps` | `int` | 500 | Save a checkpoint every X updates. |
+| `--checkpoints_total_limit` | `int` | None | Maximum number of checkpoints to store. |
+| `--resume_from_checkpoint` | `str` | None | Resume training from a previous checkpoint path or "latest". |
+| `--gradient_accumulation_steps` | `int` | 1 | Number of update steps to accumulate before a backward pass. |
+| `--gradient_checkpointing` | `flag` | False | Enable gradient checkpointing to save memory at the cost of slower backward passes. |
+| `--learning_rate` | `float` | 5e-6 | Initial learning rate after warmup. |
+| `--scale_lr` | `flag` | False | Scale learning rate by number of GPUs, gradient accumulation steps, and batch size. |
+| `--lr_scheduler` | `str` | "constant" | Learning rate scheduler type: ["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"]. |
+| `--lr_warmup_steps` | `int` | 500 | Number of steps for learning rate warmup. |
+| `--lr_num_cycles` | `int` | 1 | Number of hard resets for cosine_with_restarts scheduler. |
+| `--lr_power` | `float` | 1.0 | Power factor for polynomial scheduler. |
+| `--use_8bit_adam` | `flag` | False | Use 8-bit Adam optimizer from bitsandbytes for lower memory usage. |
+| `--dataloader_num_workers` | `int` | 0 | Number of subprocesses for data loading (0 means main process). |
+| `--adam_beta1` | `float` | 0.9 | Beta1 parameter for Adam optimizer. |
+| `--adam_beta2` | `float` | 0.999 | Beta2 parameter for Adam optimizer. |
+| `--adam_weight_decay` | `float` | 1e-2 | Weight decay for Adam optimizer. |
+| `--adam_epsilon` | `float` | 1e-08 | Epsilon value for Adam optimizer. |
+| `--max_grad_norm` | `float` | 1.0 | Maximum gradient norm for clipping. |
+| `--push_to_hub` | `flag` | False | Push the model to the HuggingFace Hub. |
+| `--hub_token` | `str` | None | Token for pushing to the HuggingFace Hub. |
+| `--hub_model_id` | `str` | None | Repository name for syncing with `output_dir`. |
+| `--logging_dir` | `str` | "logs" | TensorBoard log directory. |
+| `--allow_tf32` | `flag` | False | Allow TF32 on Ampere GPUs for faster training. |
+| `--report_to` | `str` | "tensorboard" | Integration for logging: ["tensorboard", "wandb", "comet_ml", "all"]. |
+| `--mixed_precision` | `str` | None | Mixed precision training: ["no", "fp16", "bf16"]. |
+| `--enable_xformers_memory_efficient_attention` | `flag` | False | Enable xformers for memory-efficient attention. |
+| `--set_grads_to_none` | `flag` | False | Set gradients to None instead of zero to save memory. |
+| `--dataset_name` | `str` | None | Name of the dataset from HuggingFace Hub or local path. |
+| `--dataset_config_name` | `str` | None | Dataset configuration name. |
+| `--train_data_dir` | `str` | None | Directory containing training data with `metadata.jsonl`. |
+| `--image_column` | `str` | "image" | Dataset column for target images. |
+| `--conditioning_image_column` | `str` | "conditioning_image" | Dataset column for ControlNet conditioning images. |
+| `--caption_column` | `str` | "text" | Dataset column for captions. |
+| `--max_train_samples` | `int` | None | Truncate training examples to this number for debugging or quicker training. |
+| `--proportion_empty_prompts` | `float` | 0 | Proportion of prompts to replace with empty strings (0 to 1). |
+| `--validation_prompt` | `str` | None | Prompts for validation, evaluated every `validation_steps`. |
+| `--validation_image` | `str` | None | Paths to ControlNet conditioning images for validation. |
+| `--num_validation_images` | `int` | 4 | Number of images generated per validation prompt-image pair. |
+| `--validation_steps` | `int` | 100 | Run validation every X steps. |
+| `--tracker_project_name` | `str` | "train_controlnet" | Project name for Accelerator trackers. |
+## Usage Example
+To train a ControlNet model, run the following command:
+```bash
+python src/controlnet_image_generator/train.py \
+  --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-1" \
+  --dataset_name="huggingface/controlnet-dataset" \
+  --output_dir="controlnet_output" \
+  --resolution=512 \
+  --train_batch_size=4 \
+  --num_train_epochs=3 \
+  --learning_rate=1e-5 \
+  --validation_prompt="A cat sitting on a chair" \
+  --validation_image="path/to/conditioning_image.png" \
+  --push_to_hub \
+  --hub_model_id="your-username/controlnet-model"
+```
+This command trains a ControlNet model using the Stable Diffusion 2.1 pretrained model, a specified dataset, and logs results to the HuggingFace Hub.
+## Notes
+- Ensure the dataset contains columns for target images, conditioning images, and captions as specified by `image_column`, `conditioning_image_column`, and `caption_column`.
+- The resolution must be divisible by 8 to ensure compatibility with the VAE and ControlNet encoder.
+- Mixed precision training (`fp16` or `bf16`) can reduce memory usage but requires compatible hardware.
+- Validation images and prompts must be provided in matching quantities or as single values to be reused.
+For further details, refer to the source scripts or the HuggingFace Diffusers documentation.

notebooks/SD-2.1-Openpose-ControlNet.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+huggingface-hub>=0.33.1
+bitsandbytes>=0.46.0
+diffusers>=0.34.0
+peft>=0.17.0
+controlnet-aux>=0.0.10
+accelerate>=1.7.0
+gradio>=5.39.0

requirements/requirements_compatible.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+huggingface-hub==0.34.1
+bitsandbytes==0.46.0
+diffusers==0.34.0
+peft==0.17.0
+controlnet-aux==0.0.10
+accelerate==1.7.0
+gradio==5.39.0

scripts/download_ckpts.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import argparse
+import os
+import yaml
+from huggingface_hub import hf_hub_download, list_repo_files
+def load_config(config_path):
+    with open(config_path, 'r') as file:
+        return yaml.safe_load(file)
+def download_model(model_config):
+    model_id = model_config["model_id"]
+    local_dir = model_config["local_dir"]
+    if local_dir is None:
+        print(f"Skipping download for {model_id}: local_dir is null")
+        return
+    os.makedirs(local_dir, exist_ok=True)
+    allow_patterns = model_config.get("allow", [])
+    deny_patterns = model_config.get("deny", [])
+    if allow_patterns:
+        for file in allow_patterns:
+            hf_hub_download(
+                repo_id=model_id,
+                filename=file,
+                local_dir=local_dir,
+                local_dir_use_symlinks=False
+            )
+    else:
+        print(f"No allow patterns specified for {model_id}. Attempting to download all files except those in deny list.")
+        repo_files = list_repo_files(repo_id=model_id)
+        for file in repo_files:
+            if not any(deny_pattern in file for deny_pattern in deny_patterns):
+                hf_hub_download(
+                    repo_id=model_id,
+                    filename=file,
+                    local_dir=local_dir,
+                    local_dir_use_symlinks=False
+                )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Download model checkpoints from Hugging Face Hub")
+    parser.add_argument(
+        "--config_path",
+        type=str,
+        default="configs/model_ckpts.yaml",
+        help="Path to the configuration YAML file"
+    )
+    args = parser.parse_args()
+    config = load_config(args.config_path)
+    for model_config in config:
+        print(f"Processing {model_config['model_id']} (local_dir: {model_config['local_dir']})")
+        download_model(model_config)

scripts/download_datasets.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import argparse
+import yaml
+from datasets import load_dataset
+def load_config(config_path):
+    with open(config_path, 'r') as file:
+        return yaml.safe_load(file)
+def download_huggingface_dataset(config):
+    # Get dataset details from config
+    dataset_name = config['dataset_name']
+    local_dir = config['local_dir']
+    # Split dataset name into user_name and model_hub_name
+    user_name, model_hub_name = dataset_name.split('/')
+    # Login using e.g. `huggingface-cli login` to access this dataset
+    ds = load_dataset(dataset_name, cache_dir=local_dir)
+    # Print information for verification
+    print(f"User Name: {user_name}")
+    print(f"Model Hub Name: {model_hub_name}")
+    print(f"Dataset saved to: {local_dir}")
+    print(f"Dataset info: {ds}")
+if __name__ == "__main__":
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description="Download dataset from Hugging Face")
+    parser.add_argument('--config_path',
+                        type=str,
+                        default='configs/datasets_info.yaml',
+                        help='Path to the dataset configuration YAML file')
+    args = parser.parse_args()
+    # Load configuration from YAML file
+    configs = load_config(args.config_path)
+    # Iterate through the list of configurations
+    for config in configs:
+        # Download dataset if platform is HuggingFace
+        if config['platform'] == 'HuggingFace':
+            download_huggingface_dataset(config)
+        else:
+            print(f"Unsupported platform: {config['platform']}")

scripts/setup_third_party.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import shutil
+import subprocess
+import argparse
+def setup_diffusers(target_dir):
+    # Define paths
+    diffusers_dir = os.path.join(target_dir, "diffusers")
+    # Create third_party directory if it doesn't exist
+    os.makedirs(target_dir, exist_ok=True)
+    # Check if diffusers already exists in third_party
+    if os.path.exists(diffusers_dir):
+        print(f"Diffusers already exists in {target_dir}. Skipping clone.")
+        return
+    # Clone diffusers repository
+    subprocess.run(["git", "clone", "https://github.com/huggingface/diffusers"],
+                  cwd=target_dir, check=True)
+    # Change to diffusers directory and install
+    original_dir = os.getcwd()
+    os.chdir(diffusers_dir)
+    try:
+        subprocess.run(["pip", "install", "-e", "."], check=True)
+    finally:
+        os.chdir(original_dir)
+    print(f"Diffusers successfully cloned and installed to {diffusers_dir}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Setup diffusers in a specified directory.")
+    parser.add_argument("--target-dir", type=str, default="src/third_party",
+                       help="Target directory to clone diffusers into (default: src)")
+    args = parser.parse_args()
+    setup_diffusers(args.target_dir)