Spaces:

Kwai-Kolors
/

CoTyle

Running on Zero

App Files Files Community

liuhuijie commited on Oct 16

Commit

424c702

1 Parent(s): 399083d

update

Browse files

Files changed (16) hide show

.gitattributes +1 -0
README.md +1 -1
app-ori.py +0 -310
app.py +328 -88
assets/10241024.jpg +3 -0
assets/1234567.jpg +3 -0
assets/4396.jpg +3 -0
assets/666666666.jpg +3 -0
assets/886.jpg +3 -0
models/__pycache__/model.cpython-310.pyc +0 -0
models/__pycache__/pipe.cpython-310.pyc +0 -0
models/__pycache__/quant.cpython-310.pyc +0 -0
models/__pycache__/utils.cpython-310.pyc +0 -0
models/__pycache__/vitamin.cpython-310.pyc +0 -0
models/__pycache__/vlm_unitok.cpython-310.pyc +0 -0
models/__pycache__/vqvae.cpython-310.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 title: CoTyle
 emoji: 🎨
-colorFrom: blue
 colorTo: purple
 sdk: gradio
 sdk_version: 5.49.1

 ---
 title: CoTyle
 emoji: 🎨
+colorFrom: #c8c8c8
 colorTo: purple
 sdk: gradio
 sdk_version: 5.49.1

app-ori.py DELETED Viewed

@@ -1,310 +0,0 @@
-print('v4')
-import os
-import torch
-from PIL import Image
-from io import BytesIO
-import json
-from huggingface_hub import login, hf_hub_download
-import spaces
-import gradio as gr
-token=os.environ.get("HF_TOKEN")
-login(token=os.environ.get("HF_TOKEN"))
-REPO_ID = "Kwai-Kolors/cotyle"
-# Use GPU if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-weight_type = torch.bfloat16 if device == "cuda" else torch.float32
-# Predefined suggested prompts (already in English)
-SUGGESTED_PROMPTS = [
-    "An artist sits outdoors, engrossed in their work, brush in hand, capturing the scene with focused intensity. On the canvas, trees and buildings blend seamlessly with the real-world surroundings. Symbols from different cultures, along with animals, plants, and abstract lines, float around them. As the brush touches the canvas, the paint transforms into points of light that scatter, while sheets of paper and flower petals flutter in the air, creating a sense of movement. The atmosphere is a high-detail fusion of art and reality.",
-    "Seagulls soar along the seaside under the setting sun, as a couple in wedding attire holds hands.",
-    "A cute, chubby werewolf holds a balloon and candy, looking adorably mischievous. The background features a full moon on a night sky.",
-    "A classical beauty, dressed in a dreamy, light pink flowing gown with wide sleeves, adorned with countless tiny wind crystals.",
-    "The train sped swiftly across a large bridge.",
-    "In front of the door stands an apple tree with two apples glistening with dewdrops. A beautiful little bird with vibrant feathers perches on a branch, displaying intricate textures and clear details.",
-]
-CUSTOM_OPTION = "✍️ Enter custom prompt..."
-# Lazy load models to avoid slow startup
-def load_models():
-    global pipeline, style_generator, unitok, processor, code_freq
-    if 'pipeline' in globals():
-        return  # Already loaded
-    from models.pipe import CoTylePipeline
-    from models.vlm_unitok import UniTok
-    from models.model import StyleGenerator, Qwen2_5_VLForConditionalGeneration_Quant, Qwen2_5_VL_Quant
-    from models.utils import set_seed, patched_from_model_config
-    from transformers import Qwen2VLProcessor
-    from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
-    from diffusers.models import AutoencoderKLQwenImage, QwenImageTransformer2DModel
-    from transformers.generation.configuration_utils import GenerationConfig
-    _original_from_model_config = GenerationConfig.from_model_config
-    GenerationConfig.from_model_config = classmethod(patched_from_model_config)
-    model_path = "Kwai-Kolors/cotyle"
-    unitok_config = {
-        'unitok_embed_dim': 3584,
-        'unitok_vocab_width': 64,
-        'unitok_vocab_size': 1024,
-        'unitok_e_temp': 0.01,
-        'unitok_num_codebooks': 1,
-        'unitok_le': 0.0
-    }
-    # Load Style Generator
-    style_generator_path = hf_hub_download(
-        repo_id=model_path,
-        filename='prior',
-        token=token,
-    )
-    from transformers import AutoConfig
-    config = AutoConfig.from_pretrained(f"{style_generator_path}/config.json")
-    style_generator = StyleGenerator._from_config(config)
-    state_dict = torch.load(f"{style_generator_path}/prior.pth", map_location='cpu')
-    style_generator.load_state_dict(state_dict)
-    style_generator.to(device, dtype=weight_type)
-    # Load UniTok
-    codebook_path = hf_hub_download(
-        repo_id=model_path,
-        filename='codebook',
-        token=token,
-    )
-    unitok = UniTok(unitok_config)
-    unitok_state_dict = torch.load(f"{codebook_path}/model.pth", map_location='cpu')
-    unitok.load_state_dict(unitok_state_dict)
-    unitok.to(device, dtype=weight_type)
-    # Load Pipeline (without text encoder initially)
-    pipeline = CoTylePipeline.from_pretrained(
-        model_path,
-        torch_dtype=weight_type,
-        text_encoder=None,
-        processor=None,
-        safety_checker=None,
-        requires_safety_checker=False
-    )
-    # Load Qwen2.5-VL Text-Visual Encoder
-    from transformers import Qwen2_5_VLForConditionalGeneration
-    qwen_text_visual_encoder = Qwen2_5_VLForConditionalGeneration_Quant.from_pretrained(
-    model_path,
-    subfolder='text_encoder',
-    ).to(device, dtype=weight_type)
-    qwen_text_visual_encoder = Qwen2_5_VL_Quant(unitok, qwen_text_visual_encoder)
-    qwen_text_visual_encoder.to(device, dtype=weight_type)
-    pipeline.text_encoder = qwen_text_visual_encoder
-    # Load Processor
-    processor = Qwen2VLProcessor.from_pretrained(
-        model_path,
-        subfolder='processor',
-        min_pixels=64 * 28 * 28,
-        max_pixels=256 * 28 * 28
-    )
-    pipeline.processor = processor
-    pipeline.to(device, dtype=weight_type)
-    pipeline.set_progress_bar_config(disable=True)
-    # Load code frequency
-    with open(f'{model_path}/freq.json', 'r') as f:
-        code_freq = json.load(f)
-    print("✅ All models loaded successfully!")
-def get_final_prompt(dropdown_val, text_val):
-    if dropdown_val == CUSTOM_OPTION:
-        return text_val.strip()
-    return dropdown_val.strip() if dropdown_val else ""
-@spaces.GPU
-def generate_images(style_code: int, seed: int, num_prompts: int, *args):
-    load_models()
-    from models.utils import set_seed
-    prompts = []
-    for i in range(num_prompts):
-        dropdown_val = args[i * 2] if i * 2 < len(args) else ""
-        text_val = args[i * 2 + 1] if i * 2 + 1 < len(args) else ""
-        final_prompt = get_final_prompt(dropdown_val, text_val)
-        if final_prompt:
-            prompts.append(final_prompt)
-    if not prompts:
-        raise gr.Error("Please enter at least one valid prompt!")
-    # Step 1: Generate style codebook tokens
-    set_seed(style_code)
-    style_generator_inputs = {
-        'input_ids': torch.randint(low=0, high=1024, size=(1, 1)).to(device),
-        'attention_mask': torch.ones((1, 1)).to(device),
-    }
-    with torch.no_grad():
-        generated_ids = style_generator.generate(
-            **style_generator_inputs,
-            max_new_tokens=195,
-            temperature=1.0,
-            top_k=200,
-            top_p=0.95,
-            do_sample=True,
-            repetition_penalty=50.0,
-            code_freq=code_freq,
-            code_freq_threshold=90000,
-            k=0.0001,
-        )
-    # Step 2: Generate images
-    placeholder_image = Image.new("RGB", (392, 392), (0, 0, 0))
-    results = []
-    for i, prompt in enumerate(prompts):
-        set_seed(seed)
-        inputs = {
-            "image": [placeholder_image],
-            "prompt": prompt,
-            "generator": torch.Generator(device=device).manual_seed(seed),
-            "true_cfg_scale": 6.0,
-            "negative_prompt": "ugly, monster, grotesque, deformed, mutated, anatomically incorrect, distorted face, disfigured limbs, unnatural posture, blurry, low quality",
-            "num_inference_steps": 40,
-            "guidance_scale": 1.0,
-            "num_images_per_prompt": 1,
-            "codebook_id": generated_ids,
-        }
-        with torch.inference_mode():
-            output = pipeline(**inputs)
-        results.append(output.images[0])
-    return results
-# Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), css="""
-    .prompt-hint {
-        font-size: 0.9em;
-        color: #666;
-        margin-top: -8px;
-        margin-bottom: 12px;
-    }
-""") as demo:
-    gr.Markdown(
-        """
-    <div align="center">
-    ## 🎨 CoTyle: Unlocking Code-to-Style Image Generation with Discrete Style Space
-    Enter a `style code` and multiple prompts to generate stylized images.
-    <p align="center">
-    <a href="xxx"><img alt="Project Page" src="https://img.shields.io/badge/Project%20Page-Homepage-yellow"></a>
-    <a href="xxx"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-Code-f8f0f0.svg"></a>
-    <a href="xxx"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-Paper-da282a.svg"></a>
-    <a href="xxxK"><img alt="Hugging Face Demo" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Demo-fd8b02"></a>
-    </p>
-    </div>
-    """
-    )
-    with gr.Row():
-        with gr.Column():
-            style_code = gr.Number(label="Style Code", value=1234567, step=1)
-            num_prompts = gr.Slider(
-                minimum=1,
-                maximum=6,
-                value=4,
-                step=1,
-                label="Number of Prompts (You can choose how many prompt images to generate at once)"
-            )
-            all_dropdowns = []
-            all_texts = []
-            prompt_rows = []
-            with gr.Column():
-                for i in range(6):
-                    with gr.Row(visible=(i < 4)) as row:
-                        choices = [""] + SUGGESTED_PROMPTS + [CUSTOM_OPTION]
-                        dropdown = gr.Dropdown(
-                            choices=choices,
-                            value=SUGGESTED_PROMPTS[i] if i < len(SUGGESTED_PROMPTS) else "",
-                            label=f"Prompt {i+1}",
-                            interactive=True
-                        )
-                        text = gr.Textbox(
-                            label=f"Custom Prompt {i+1}",
-                            lines=2,
-                            visible=False
-                        )
-                        def update_text_visibility(dropdown_val):
-                            return gr.update(visible=(dropdown_val == CUSTOM_OPTION))
-                        dropdown.change(
-                            fn=update_text_visibility,
-                            inputs=dropdown,
-                            outputs=text
-                        )
-                        all_dropdowns.append(dropdown)
-                        all_texts.append(text)
-                        prompt_rows.append(row)
-            seed = gr.Number(label="Seed", value=42, step=1)
-            run_btn = gr.Button("✨ Generate All Images", variant="primary", size="lg")
-        with gr.Column():
-            gallery = gr.Gallery(
-                label="Generated Results",
-                show_label=True,
-                columns=2,
-                rows=2,
-                object_fit="contain",
-                height="auto"
-            )
-    # Update visibility of prompt rows
-    def update_rows_visibility(n):
-        return [gr.update(visible=(i < n)) for i in range(6)]
-    num_prompts.change(
-        fn=update_rows_visibility,
-        inputs=num_prompts,
-        outputs=prompt_rows
-    )
-    # Build input list: [style_code, seed, num_prompts, d1, t1, d2, t2, ...]
-    input_components = [style_code, seed, num_prompts]
-    for d, t in zip(all_dropdowns, all_texts):
-        input_components.extend([d, t])
-    run_btn.click(
-        fn=generate_images,
-        inputs=input_components,
-        outputs=gallery
-    )
-    gr.Markdown("""
-    > **Tips**:
-    > - Adjust the **Number of Prompts** slider to add or remove input rows.
-    > - Select **"✍️ Enter custom prompt..."** to type your own prompts.
-    > - All images share the same `style_code`.
-    """)
-# Launch
-if __name__ == "__main__":
-    import sys
-    sys.path.append(".")
-    demo.queue.launch(debug=True)

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ from PIL import Image
 from huggingface_hub import snapshot_download
 import gc
 import psutil
-import os
 try:
     import pynvml
     pynvml.nvmlInit()
@@ -24,7 +25,7 @@ try:
 except Exception as e:
     print("无法获取 GPU 信息:", e)
-REPO_ID = "Kwai-Kolors/cotyle"
 HF_TOKEN = os.getenv("HF_TOKEN")
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -38,7 +39,71 @@ SUGGESTED_PROMPTS = [
     "The train sped swiftly across a large bridge.",
     "In front of the door stands an apple tree with two apples glistening with dewdrops. A beautiful little bird with vibrant feathers perches on a branch, displaying intricate textures and clear details.",
 ]
-CUSTOM_OPTION = "✍️ Enter custom prompt..."
 def check_memory_usage(tag):
     process = psutil.Process(os.getpid())
@@ -58,18 +123,17 @@ def load_models():
         repo_id=REPO_ID,
         token=HF_TOKEN,
         allow_patterns=[
-            "prior/**",          # 递归下载 prior/ 目录下所有文件
-            "codebook/**",       # 递归下载 codebook/ 目录下所有文件
             "tokenizer/**",
-            "processor/**",      # 递归下载 processor/ 目录下所有文件
-            "text_encoder/**",   # 递归下载 text_encoder/ 目录下所有文件
-            "freq.json",         # 明确指定单个文件（可选，也可用 *.json）
             "processor/**",
             "transformer/**",
             "vae/**",
-            "*.json",            # 所有 .json 文件（包括 config.json 等）
-            "*.pth",             # 所有 .pth 文件
-            "*.safetensors",     # 所有 .safetensors 文件
         ],
         resume_download=True,
     )
@@ -110,7 +174,6 @@ def load_models():
         processor=None,
         safety_checker=None,
         requires_safety_checker=False,
     )
     check_memory_usage('before qwen')
     qwen_text_visual_encoder = Qwen2_5_VLForConditionalGeneration_Quant.from_pretrained(
@@ -135,11 +198,6 @@ def load_models():
         code_freq = json.load(f)
     print('='*10, " All models loaded successfully!")
-def get_final_prompt(dropdown_val, text_val):
-    if dropdown_val == CUSTOM_OPTION:
-        return (text_val or "").strip()
-    return (dropdown_val or "").strip()
 @spaces.GPU
 def generate_images(style_code, seed, num_prompts, *args):
     try:
@@ -154,17 +212,22 @@ def generate_images(style_code, seed, num_prompts, *args):
         num_prompts = int(num_prompts)
     except Exception:
         num_prompts = 1
     load_models()
     from models.utils import set_seed
     prompts = []
     for i in range(num_prompts):
-        dropdown_val = args[i * 2] if i * 2 < len(args) else ""
-        text_val = args[i * 2 + 1] if i * 2 + 1 < len(args) else ""
-        final_prompt = get_final_prompt(dropdown_val, text_val)
-        if final_prompt:
-            prompts.append(final_prompt)
     if not prompts:
         raise gr.Error("Please enter at least one valid prompt!")
     set_seed(style_code)
     style_generator_inputs = {
         "input_ids": torch.randint(low=0, high=1024, size=(1, 1)).to(device),
@@ -187,6 +250,7 @@ def generate_images(style_code, seed, num_prompts, *args):
     placeholder_image = Image.new("RGB", (392, 392), (0, 0, 0))
     results = []
     for i, prompt in enumerate(prompts):
         set_seed(seed)
         inputs = {
             "image": [placeholder_image],
@@ -199,22 +263,57 @@ def generate_images(style_code, seed, num_prompts, *args):
             "num_images_per_prompt": 1,
             "codebook_id": generated_ids,
         }
-        print('='*10, 'before infer')
         with torch.inference_mode():
             output = pipeline(**inputs)
-        print('='*10, 'after inference')
         results.append(output.images[0])
-        # output.images[0].save('tmp.png')
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     gc.collect()
     del output
     return results
 with gr.Blocks(
-    theme=gr.themes.Soft(),
     css="""
     .prompt-hint {
         font-size: 0.9em;
@@ -222,69 +321,120 @@ with gr.Blocks(
         margin-top: -8px;
         margin-bottom: 12px;
     }
 """
 ) as demo:
-    gr.Markdown(
         """
-    <div align="center">
-    ## 🎨 CoTyle: Unlocking Code-to-Style Image Generation with Discrete Style Space
     <div style="display: flex; justify-content: center; gap: 10px; flex-wrap: wrap; margin: 15px 0;">
         <a href="xxx"><img alt="Project Page" src="https://img.shields.io/badge/Project%20Page-Homepage-yellow"></a>
         <a href="xxx"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-Code-f8f0f0.svg"></a>
         <a href="xxx"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-Paper-da282a.svg"></a>
-        <a href="xxxK"><img alt="Hugging Face Demo" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Demo-fd8b02"></a>
     </div>
     </div>
         """
     )
     with gr.Row():
         with gr.Column():
-            style_code = gr.Number(label="Style Code", value=1234567, step=1)
             num_prompts = gr.Slider(
                 minimum=1,
                 maximum=6,
-                value=1,
                 step=1,
                 label="Number of Prompts (You can choose how many prompt images to generate at once)",
             )
-            all_dropdowns = []
-            all_texts = []
-            with gr.Column():
-                for i in range(6):
-                    choices = [""] + SUGGESTED_PROMPTS + [CUSTOM_OPTION]
-                    dropdown = gr.Dropdown(
-                        choices=choices,
-                        value=SUGGESTED_PROMPTS[i] if i < len(SUGGESTED_PROMPTS) else "",
-                        label=f"Prompt {i+1}",
-                        interactive=True,
-                        visible=(i < 1),
-                    )
-                    text = gr.Textbox(
-                        label=f"Custom Prompt {i+1}",
-                        lines=2,
-                        visible=False,
-                    )
-                    def update_text_visibility(dropdown_val):
-                        return gr.update(visible=(dropdown_val == CUSTOM_OPTION))
-                    dropdown.change(
-                        fn=update_text_visibility,
-                        inputs=dropdown,
-                        outputs=text,
-                    )
-                    all_dropdowns.append(dropdown)
-                    all_texts.append(text)
-            seed = gr.Number(label="Seed", value=42, step=1)
             run_btn = gr.Button("✨ Generate All Images", variant="primary", size="lg")
         with gr.Column():
@@ -292,28 +442,32 @@ with gr.Blocks(
                 label="Generated Results",
                 show_label=True,
                 columns=2,
-                rows=2,
                 object_fit="contain",
-                height="auto",
             )
-    def update_components_visibility(n):
-        updates = []
-        for i in range(6):
-            updates.append(gr.update(visible=(i < n)))
-        for i in range(6):
-            updates.append(gr.update(visible=False))
-        return updates
     num_prompts.change(
-        fn=update_components_visibility,
         inputs=num_prompts,
-        outputs=(all_dropdowns + all_texts),
     )
-    input_components = [style_code, seed, num_prompts]
-    for d, t in zip(all_dropdowns, all_texts):
-        input_components.extend([d, t])
     run_btn.click(
         fn=generate_images,
@@ -321,16 +475,102 @@ with gr.Blocks(
         outputs=gallery,
     )
-    gr.Markdown(
-        """
-    > <strong>Tips</strong>:
-    > - Adjust the <strong>Number of Prompts</strong> slider to add or remove input rows.
-    > - Select <strong>"✍️ Enter custom prompt..."</strong> to type your own prompts.
-    > - All images share the same `style_code`.
-    """
-    )
 if __name__ == "__main__":
     load_models()
-    demo.queue().launch(max_threads=1, share=True)

 from huggingface_hub import snapshot_download
 import gc
 import psutil
+from functools import partial
 try:
     import pynvml
     pynvml.nvmlInit()
 except Exception as e:
     print("无法获取 GPU 信息:", e)
+REPO_ID = "Kwai-Kolors/Kolors-CoTyle"
 HF_TOKEN = os.getenv("HF_TOKEN")
 device = "cuda" if torch.cuda.is_available() else "cpu"
     "The train sped swiftly across a large bridge.",
     "In front of the door stands an apple tree with two apples glistening with dewdrops. A beautiful little bird with vibrant feathers perches on a branch, displaying intricate textures and clear details.",
 ]
+# 预设模板配置
+PRESET_TEMPLATES = [
+    {
+        "name": "--sref 1234567",
+        "image_path": "assets/1234567.jpg",
+        "style_code": 1234567,
+        "seed": 42,
+        "prompts": [
+            "An artist sits outdoors, engrossed in their work, brush in hand, capturing the scene with focused intensity. On the canvas, trees and buildings blend seamlessly with the real-world surroundings. Symbols from different cultures, along with animals, plants, and abstract lines, float around them. As the brush touches the canvas, the paint transforms into points of light that scatter, while sheets of paper and flower petals flutter in the air, creating a sense of movement. The atmosphere is a high-detail fusion of art and reality.",
+            "Seagulls soar along the seaside under the setting sun, as a couple in wedding attire holds hands.",
+            "A cute, chubby werewolf holds a balloon and candy, looking adorably mischievous. The background features a full moon on a night sky.",
+            "A classical beauty, dressed in a dreamy, light pink flowing gown with wide sleeves, adorned with countless tiny wind crystals.",
+        ]
+    },
+    {
+        "name": "--sref 666666666",
+        "image_path": "assets/666666666.jpg",
+        "style_code": 666666666,
+        "seed": 42,
+        "prompts": [
+            "A chubby, white, curly-furred baby lamb in anime style, with a pink nose and short mouth, stands on grass looking directly at the camera.",
+            "A boy with a backpack stands on a mountain peak, bathed in sunlight, with continuous mountain ranges in the background.",
+            "Aerial view: distant wind turbines, mountains, a river, heavy snowfall, and four or five people in orange work uniforms and white safety helmets marching in a line through the snow.",
+            "A beautiful Chinese woman in ancient red silk attire rides a white horse, holding a red tassel spear, facing an enemy army of thousands; ethereal clouds swirl around her, and behind her stand countless celestial soldiers clad in white armor; documentary photography style."
+        ]
+    },
+    {
+        "name": "--sref 886",
+        "image_path": "assets/886.jpg",
+        "style_code": 886,
+        "seed": 42,
+        "prompts": [
+            "A lovely crystal snake spirit, slender and nimble, wears an exquisite crystal crown atop its head. Its scales are translucent, shimmering like crystal, its eyes are bright and round, and its expression is lively. Its body coils naturally, its tail gracefully curved, its overall posture harmonious and beautiful.",
+            "Seagulls soar along the seaside under the setting sun, as a couple in wedding attire holds hands.",
+            "A cute, chubby werewolf holds a balloon and candy, looking adorably mischievous. The background features a full moon on a night sky.",
+            "The train sped swiftly across a large bridge."
+        ]
+    },
+    {
+        "name": "--sref 10241024",
+        "image_path": "assets/10241024.jpg",
+        "style_code": 10241024,
+        "seed": 42,
+        "prompts": [
+            "An elegant tabby cat steps gracefully through the doorway, its soft paws landing silently on the floor. Its amber eyes scan the surroundings with keen alertness, taking in every detail of the room.",
+            "Mickey Mouse appears in the 1920s gangster world, dressed in a long trench coat and a fedora, holding an old-fashioned revolver. The backdrop is a dimly lit Chicago alleyway, where shadows stretch across the cobblestones and the air is thick with the intrigue of the era.",
+            "A motorcycle speeds down the highway, the rider clad in black leather, with a biker girl seated behind him. The setting sun glints off the metallic fuel tank, while the rear wheel kicks up a trail of dust. In the background, the desolate road stretches endlessly towards the horizon, framed by the vast wilderness.",
+            "A classical beauty, dressed in a dreamy, light pink flowing gown with wide sleeves, adorned with countless tiny wind crystals."
+        ]
+    },
+    {
+        "name": "--sref 4396",
+        "image_path": "assets/4396.jpg",
+        "style_code": 4396,
+        "seed": 42,
+        "prompts": [
+            "A boy and a girl are walking along the lakeside, surrounded by vibrant flowers, lush grass, and verdant trees.",
+            "A hazy full moon hangs high in the night sky, with the bustling streets of an ancient town below, adorned with a variety of lanterns that are vibrant and bright.",
+            "A cartoon bear with a wide, round mouth and neatly arranged teeth, illustration, mascot, chubby.",
+            "A real-life depiction of a warrior goddess is strikingly beautiful, adorned in metallic armor. She has long legs and sports enormous wings, adding to her majestic presence. A crown sits atop her head, and she wields a weapon, poised in a dynamic battle stance."
+        ]
+    },
+]
 def check_memory_usage(tag):
     process = psutil.Process(os.getpid())
         repo_id=REPO_ID,
         token=HF_TOKEN,
         allow_patterns=[
+            "prior/**",
+            "codebook/**",
             "tokenizer/**",
             "processor/**",
+            "text_encoder/**",
+            "freq.json",
             "transformer/**",
             "vae/**",
+            "*.json",
+            "*.pth",
+            "*.safetensors",
         ],
         resume_download=True,
     )
         processor=None,
         safety_checker=None,
         requires_safety_checker=False,
     )
     check_memory_usage('before qwen')
     qwen_text_visual_encoder = Qwen2_5_VLForConditionalGeneration_Quant.from_pretrained(
         code_freq = json.load(f)
     print('='*10, " All models loaded successfully!")
 @spaces.GPU
 def generate_images(style_code, seed, num_prompts, *args):
     try:
         num_prompts = int(num_prompts)
     except Exception:
         num_prompts = 1
     load_models()
     from models.utils import set_seed
     prompts = []
     for i in range(num_prompts):
+        if i < len(args):
+            prompt_text = (args[i] or "").strip()
+            if prompt_text:
+                prompts.append(prompt_text)
+    print(f"收集到 {len(prompts)} 个有效 prompts")
     if not prompts:
         raise gr.Error("Please enter at least one valid prompt!")
     set_seed(style_code)
     style_generator_inputs = {
         "input_ids": torch.randint(low=0, high=1024, size=(1, 1)).to(device),
     placeholder_image = Image.new("RGB", (392, 392), (0, 0, 0))
     results = []
     for i, prompt in enumerate(prompts):
+        print(f"正在生成第 {i+1}/{len(prompts)} 张图片")
         set_seed(seed)
         inputs = {
             "image": [placeholder_image],
             "num_images_per_prompt": 1,
             "codebook_id": generated_ids,
         }
         with torch.inference_mode():
             output = pipeline(**inputs)
         results.append(output.images[0])
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     gc.collect()
     del output
+    print(f"成功生成 {len(results)} 张图片")
     return results
+def load_preset_template(template_idx):
+    """加载预设模板并返回所有需要更新的组件值"""
+    template = PRESET_TEMPLATES[template_idx]
+    outputs = [
+        template["style_code"],
+        template["seed"],
+        4,
+    ]
+    for i in range(4):
+        outputs.append(template["prompts"][i])
+    for i in range(2):
+        outputs.append("")
+    return tuple(outputs)  # 返回 tuple 而不是 list，稍微快一点
+def create_placeholder_image(text):
+    """创建占位符图片"""
+    return Image.new('RGB', (300, 200), color=(240, 240, 240))
+# 使用 Blocks 的 js 参数来加速 UI 更新
+custom_js = """
+function() {
+    // 优化 Gradio 的更新性能
+    const style = document.createElement('style');
+    style.textContent = `
+        .gradio-container { transition: none !important; }
+        .gr-box { transition: none !important; }
+    `;
+    document.head.appendChild(style);
+}
+"""
 with gr.Blocks(
+    # theme=gr.themes.midnight(),
+    theme = 'Taithrah/Minimal',
+    js=custom_js,  # 添加自定义 JS 来禁用不必要的动画
     css="""
     .prompt-hint {
         font-size: 0.9em;
         margin-top: -8px;
         margin-bottom: 12px;
     }
+    .preset-container {
+        border: 2px solid #e0e0e0;
+        border-radius: 12px;
+        padding: 12px;
+        cursor: pointer;
+        transition: all 0.3s ease;
+        background: white;
+        height: 100%;
+        display: flex;
+        flex-direction: column;
+        max-width: 280px;
+        margin: 0 auto;
+    }
+    .preset-container:hover {
+        border-color: #2196F3;
+        box-shadow: 0 4px 12px rgba(33, 150, 243, 0.2);
+        transform: translateY(-2px);
+    }
+    .preset-image-container {
+        width: 100%;
+        height: 240px;
+        overflow: hidden;
+        border-radius: 8px;
+        margin-bottom: 1px;
+        background: white;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+    }
+    .preset-image-container img {
+        width: 100%;
+        height: 100%;
+        object-fit: cover;
+    }
+    .preset-text {
+        text-align: center;
+        font-weight: bold;
+        font-size: 1.0em;
+        color: #333;
+        padding: 3px 0;
+    }
+    .preset-row {
+        margin-bottom: 10px;
+        justify-content: center;
+        gap: 15px;
+    }
+    .preset-section {
+        max-width: 1900px;
+        margin: 0 auto;
+        padding: 0 20px;
+    }
+    /* 禁用不必要的过渡动画以加速 */
+    .gr-box, .gr-form, .gr-input {
+        transition: none !important;
+    }
 """
 ) as demo:
+    gr.HTML(
         """
+    <div align="center" style="font-size: 40px;">
+    🎨 CoTyle: Unlocking Code-to-Style Image Generation with Discrete Style Space
     <div style="display: flex; justify-content: center; gap: 10px; flex-wrap: wrap; margin: 15px 0;">
         <a href="xxx"><img alt="Project Page" src="https://img.shields.io/badge/Project%20Page-Homepage-yellow"></a>
         <a href="xxx"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-Code-f8f0f0.svg"></a>
         <a href="xxx"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-Paper-da282a.svg"></a>
+        <a href="https://huggingface.co/spaces/Kwai-Kolors/CoTyle"><img alt="Hugging Face Demo" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Demo-fd8b02"></a>
     </div>
     </div>
         """
     )
     with gr.Row():
         with gr.Column():
+            # style_code = gr.Number(label="Style Code", value=1234567, step=1)
+            style_code = gr.Slider(
+                minimum=1,
+                maximum=4294967296,
+                value=1234567,
+                step=1,
+                label="Style Code",
+            )
             num_prompts = gr.Slider(
                 minimum=1,
                 maximum=6,
+                value=4,
                 step=1,
                 label="Number of Prompts (You can choose how many prompt images to generate at once)",
             )
+            text_inputs = []
+            for i in range(6):
+                default_prompt = SUGGESTED_PROMPTS[i] if i < len(SUGGESTED_PROMPTS) else ""
+                textbox = gr.Textbox(
+                    value=default_prompt,
+                    label=f"Prompt {i+1}",
+                    lines=3,
+                    max_lines=10,
+                    placeholder="Enter your prompt here...",
+                    visible=(i < 4),
+                )
+                text_inputs.append(textbox)
+            # seed = gr.Number(label="Seed", value=42, step=1)
+            seed = gr.Slider(
+                minimum=1,
+                maximum=4294967296,
+                value=42,
+                step=1,
+                label="Seed",
+            )
             run_btn = gr.Button("✨ Generate All Images", variant="primary", size="lg")
         with gr.Column():
                 label="Generated Results",
                 show_label=True,
                 columns=2,
                 object_fit="contain",
+                height="100%",
             )
+            gr.Markdown(
+                """
+            > <strong>Tips</strong>:
+            > - Adjust the <strong>Number of Prompts</strong> slider to add or remove input rows.
+            > - Type your own prompts directly in the text boxes.
+            > - All images share the same style_code.
+            > - You can click any template below to quickly load preset style code and prompts.
+            """
+            )
+    # 优化的可见性更新函数
+    def update_textboxes_visibility(n):
+        # 使用列表推导式，更快
+        return [gr.update(visible=(i < n)) for i in range(6)]
+    # 使用 queue=False 来加速不需要排队的操作
     num_prompts.change(
+        fn=update_textboxes_visibility,
         inputs=num_prompts,
+        outputs=text_inputs,
+        queue=False,  # 关键：禁用队列以加速
     )
+    input_components = [style_code, seed, num_prompts] + text_inputs
     run_btn.click(
         fn=generate_images,
         outputs=gallery,
     )
+    # 预先创建输出组件列表（在循环外）
+    output_components = [style_code, seed, num_prompts] + text_inputs
+    # 添加预设模板区域
+    with gr.Column(elem_classes="preset-section"):
+        gr.Markdown("## 🎯 Examples")
+        gr.Markdown("Click any example below to quickly load preset style code, seed, and prompts")
+        # 第一行3个预设
+        with gr.Row(elem_classes="preset-row"):
+            for i in range(5):
+                with gr.Column(scale=1, min_width=250):
+                    template = PRESET_TEMPLATES[i]
+                    with gr.Column(elem_classes="preset-container"):
+                        if os.path.exists(template["image_path"]):
+                            preset_img = gr.Image(
+                                value=template["image_path"],
+                                show_label=False,
+                                interactive=False,
+                                container=False,
+                                height=280,
+                                elem_classes="preset-image-container"
+                            )
+                        else:
+                            placeholder = create_placeholder_image(template["name"])
+                            preset_img = gr.Image(
+                                value=placeholder,
+                                show_label=False,
+                                interactive=False,
+                                container=False,
+                                height=280,
+                                elem_classes="preset-image-container"
+                            )
+                        preset_btn = gr.Button(
+                            value=template["name"],
+                            variant="secondary",
+                            size="lg"
+                        )
+                        # 使用 partial 和 queue=False 加速
+                        preset_btn.click(
+                            fn=partial(load_preset_template, i),
+                            inputs=None,
+                            outputs=output_components,
+                            queue=False,  # 关键：禁用队列
+                        )
+        # # 第二行3个预设
+        # with gr.Row(elem_classes="preset-row"):
+        #     for i in range(3, 6):
+        #         with gr.Column(scale=1, min_width=250):
+        #             template = PRESET_TEMPLATES[i]
+        #             with gr.Column(elem_classes="preset-container"):
+        #                 if os.path.exists(template["image_path"]):
+        #                     preset_img = gr.Image(
+        #                         value=template["image_path"],
+        #                         show_label=False,
+        #                         interactive=False,
+        #                         container=False,
+        #                         height=280,
+        #                         elem_classes="preset-image-container"
+        #                     )
+        #                 else:
+        #                     placeholder = create_placeholder_image(template["name"])
+        #                     preset_img = gr.Image(
+        #                         value=placeholder,
+        #                         show_label=False,
+        #                         interactive=False,
+        #                         container=False,
+        #                         height=280,
+        #                         elem_classes="preset-image-container"
+        #                     )
+        #                 preset_btn = gr.Button(
+        #                     value=template["name"],
+        #                     variant="secondary",
+        #                     size="lg"
+        #                 )
+        #                 # 使用 partial 和 queue=False 加速
+        #                 preset_btn.click(
+        #                     fn=partial(load_preset_template, i),
+        #                     inputs=None,
+        #                     outputs=output_components,
+        #                     queue=False,  # 关键：禁用队列
+        #                 )
 if __name__ == "__main__":
     load_models()
+    # 调整 queue 参数以优化性能
+    demo.queue(
+        max_size=20,  # 减小队列大小
+        default_concurrency_limit=1
+    ).launch(
+        max_threads=1,
+        share=True
+    )

assets/10241024.jpg ADDED Viewed

Git LFS Details

SHA256: 8de8dec69d93d9b6092f68847249ce0c9e5c3257a71bd96ed6b36bbb53aadca7
Pointer size: 131 Bytes
Size of remote file: 584 kB

assets/1234567.jpg ADDED Viewed

Git LFS Details

SHA256: f4e227b8d193e7f4ec91e5c9ed3bdb587a098b615e8a3dca96754b3fde06398f
Pointer size: 131 Bytes
Size of remote file: 414 kB

assets/4396.jpg ADDED Viewed

Git LFS Details

SHA256: 5f05e335b46ef9a405279e123f00b02e6100e2f5999457cef1599ba134c6c9e3
Pointer size: 131 Bytes
Size of remote file: 757 kB

assets/666666666.jpg ADDED Viewed

Git LFS Details

SHA256: f3641e3651354992bb7889fd8552db5d5a4b9e1b4048c38e2acfebdbf2781023
Pointer size: 131 Bytes
Size of remote file: 653 kB

assets/886.jpg ADDED Viewed

Git LFS Details

SHA256: 6f75b102f5c6afcf6691d846a6033b431b69a2b1cf3724a55fee2967cb18448f
Pointer size: 131 Bytes
Size of remote file: 630 kB

models/__pycache__/model.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/model.cpython-310.pyc and b/models/__pycache__/model.cpython-310.pyc differ

models/__pycache__/pipe.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/pipe.cpython-310.pyc and b/models/__pycache__/pipe.cpython-310.pyc differ

models/__pycache__/quant.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/quant.cpython-310.pyc and b/models/__pycache__/quant.cpython-310.pyc differ

models/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/utils.cpython-310.pyc and b/models/__pycache__/utils.cpython-310.pyc differ

models/__pycache__/vitamin.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/vitamin.cpython-310.pyc and b/models/__pycache__/vitamin.cpython-310.pyc differ

models/__pycache__/vlm_unitok.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/vlm_unitok.cpython-310.pyc and b/models/__pycache__/vlm_unitok.cpython-310.pyc differ

models/__pycache__/vqvae.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/vqvae.cpython-310.pyc and b/models/__pycache__/vqvae.cpython-310.pyc differ