SORA-3D

Paused

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

23a34ba

verified ·

1 Parent(s): b3a304c

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -360

app.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import gradio as gr
 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
 import torch
 import numpy as np
 import imageio
@@ -11,162 +14,28 @@ from PIL import Image
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
-from transformers import pipeline as translation_pipeline
-from diffusers import FluxPipeline
-from typing import *
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = "/tmp/Trellis-demo"
-os.makedirs(TMP_DIR, exist_ok=True)
-# GPU 메모리 관련 환경 변수
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'  # 더 작은 값으로 설정
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
-os.environ['CUDA_CACHE_DISABLE'] = '1'
-def initialize_models():
-    global pipeline, translator, flux_pipe
-    try:
-        # CUDA 설정
-        if torch.cuda.is_available():
-            torch.backends.cudnn.benchmark = True
-            torch.backends.cuda.matmul.allow_tf32 = True
-            torch.backends.cudnn.allow_tf32 = True
-        print("Initializing Trellis pipeline...")
-        try:
-            pipeline = TrellisImageTo3DPipeline.from_pretrained(
-                "JeffreyXiang/TRELLIS-image-large"
-            )
-            if pipeline is None:
-                raise ValueError("Pipeline initialization returned None")
-            if torch.cuda.is_available():
-                pipeline = pipeline.to("cuda")
-                # Half precision으로 변환
-                pipeline = pipeline.half()
-        except Exception as e:
-            print(f"Error initializing Trellis pipeline: {str(e)}")
-            raise
-        print("Initializing translator...")
-        try:
-            translator = translation_pipeline(
-                "translation",
-                model="Helsinki-NLP/opus-mt-ko-en",
-                device=0 if torch.cuda.is_available() else -1
-            )
-        except Exception as e:
-            print(f"Error initializing translator: {str(e)}")
-            raise
-        flux_pipe = None
-        print("Models initialized successfully")
-        return True
-    except Exception as e:
-        print(f"Model initialization error: {str(e)}")
-        free_memory()
-        return False
-def get_flux_pipe():
-    """Flux 파이프라인을 필요할 때만 로드하는 함수"""
-    global flux_pipe
-    if flux_pipe is None:
-        try:
-            free_memory()
-            flux_pipe = FluxPipeline.from_pretrained(
-                "black-forest-labs/FLUX.1-dev",
-                use_safetensors=True
-            )
-            if torch.cuda.is_available():
-                flux_pipe = flux_pipe.to("cuda")
-                flux_pipe.enable_model_cpu_offload()  # CPU 오프로딩 활성화
-        except Exception as e:
-            print(f"Error loading Flux pipeline: {e}")
-            return None
-    return flux_pipe
-def free_memory():
-    """강화된 메모리 정리 함수"""
-    import gc
-    import os
-    # Python 가비지 컬렉션
-    gc.collect()
-    # CUDA 메모리 정리
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-    # 임시 파일 정리
-    tmp_dirs = ['/tmp/transformers_cache', '/tmp/torch_home',
-                '/tmp/huggingface', '/tmp/cache', TMP_DIR]
-    for dir_path in tmp_dirs:
-        if os.path.exists(dir_path):
-            try:
-                for file in os.listdir(dir_path):
-                    file_path = os.path.join(dir_path, file)
-                    if os.path.isfile(file_path):
-                        try:
-                            os.unlink(file_path)
-                        except:
-                            pass
-            except:
-                pass
-def setup_gpu_model(model):
-    """GPU 설정이 필요한 모델을 처리하는 함수"""
-    if torch.cuda.is_available():
-        model = model.to("cuda")
-    return model
-def translate_if_korean(text):
-    if any(ord('가') <= ord(char) <= ord('힣') for char in text):
-        translated = translator(text)[0]['translation_text']
-        return translated
-    return text
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
-    if image is None:
-        return None, None
-    try:
-        trial_id = str(uuid.uuid4())
-        # 이미지 크기 제한
-        max_size = 768
-        if max(image.size) > max_size:
-            ratio = max_size / max(image.size)
-            new_size = tuple(int(dim * ratio) for dim in image.size)
-            image = image.resize(new_size, Image.LANCZOS)
-        # 이미지 전처리
-        processed_image = pipeline.preprocess_image(image)
-        if processed_image is None:
-            raise Exception("Failed to process image")
-        # 임시 파일 저장
-        save_path = os.path.join(TMP_DIR, f"{trial_id}.png")
-        processed_image.save(save_path)
-        return trial_id, processed_image
-    except Exception as e:
-        print(f"Error in preprocess_image: {str(e)}")
-        return None, None
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
@@ -184,7 +53,8 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
         },
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
@@ -207,190 +77,116 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh, state['trial_id']
-def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
-                ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
-    try:
-        if randomize_seed:
-            seed = np.random.randint(0, MAX_SEED)
-        input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
-        # L40S에 맞게 이미지 크기 제한 조정
-        max_size = 768
-        if max(input_image.size) > max_size:
-            ratio = max_size / max(input_image.size)
-            input_image = input_image.resize(
-                (int(input_image.size[0] * ratio),
-                 int(input_image.size[1] * ratio)),
-                Image.LANCZOS
-            )
-        if torch.cuda.is_available():
-            pipeline.to("cuda")
-        try:
-            outputs = pipeline.run(
-                input_image,
-                seed=seed,
-                formats=["gaussian", "mesh"],
-                preprocess_image=False,
-                sparse_structure_sampler_params={
-                    "steps": min(ss_sampling_steps, 20),
-                    "cfg_strength": ss_guidance_strength,
-                },
-                slat_sampler_params={
-                    "steps": min(slat_sampling_steps, 20),
-                    "cfg_strength": slat_guidance_strength,
-                }
-            )
-        except RuntimeError as e:
-            print(f"Runtime error in pipeline.run: {str(e)}")
-            free_memory()
-            raise e
-        # 비디오 생성
-        video = render_utils.render_video(outputs['gaussian'][0], num_frames=40)['color']
-        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=40)['normal']
-        video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
-        trial_id = str(uuid.uuid4())
-        video_path = f"{TMP_DIR}/{trial_id}.mp4"
-        os.makedirs(os.path.dirname(video_path), exist_ok=True)
-        imageio.mimsave(video_path, video, fps=20)
-        state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
-        if torch.cuda.is_available():
-            pipeline.to("cpu")
-        return state, video_path
-    except Exception as e:
-        print(f"Error in image_to_3d: {str(e)}")
-        if torch.cuda.is_available():
-            pipeline.to("cpu")
-        raise e
-def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
-    try:
-        free_memory()
-        flux_pipe = get_flux_pipe()
-        if flux_pipe is None:
-            raise Exception("Failed to load Flux pipeline")
-        # L40S에 맞게 크기 제한 조정
-        height = min(height, 1024)
-        width = min(width, 1024)
-        translated_prompt = translate_if_korean(prompt)
-        final_prompt = f"{translated_prompt}, wbgmsst, 3D, white background"
-        with torch.cuda.amp.autocast():
-            output = flux_pipe(
-                prompt=[final_prompt],
-                height=height,
-                width=width,
-                guidance_scale=guidance_scale,
-                num_inference_steps=num_steps,
-                generator=torch.Generator(device='cuda')
-            )
-        image = output.images[0]
-        free_memory()
-        return image
-    except Exception as e:
-        print(f"Error in generate_image_from_text: {str(e)}")
-        free_memory()
-        raise e
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = f"{TMP_DIR}/{trial_id}.glb"
     glb.export(glb_path)
     return glb_path, glb_path
 def activate_button() -> gr.Button:
     return gr.Button(interactive=True)
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
-css = """
-footer {
-    visibility: hidden;
-}
-"""
-# Gradio 인터페이스 정의
-with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
-    # Craft3D : 3D Asset Creation & Text-to-Image Generation
     """)
-    with gr.Tabs():
-        with gr.TabItem("Image to 3D"):
-            with gr.Row():
-                with gr.Column():
-                    image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300)
-                    with gr.Accordion(label="Generation Settings", open=False):
-                        seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
-                        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                        gr.Markdown("Stage 1: Sparse Structure Generation")
-                        with gr.Row():
-                            ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                            ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                        gr.Markdown("Stage 2: Structured Latent Generation")
-                        with gr.Row():
-                            slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
-                            slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                    generate_btn = gr.Button("Generate")
-                    with gr.Accordion(label="GLB Extraction Settings", open=False):
-                        mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
-                        texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
-                    extract_glb_btn = gr.Button("Extract GLB", interactive=False)
-                with gr.Column():
-                    video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
-                    model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
-                    download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
-        with gr.TabItem("Text to Image"):
-            with gr.Row():
-                with gr.Column():
-                    text_prompt = gr.Textbox(
-                        label="Text Prompt",
-                        placeholder="Enter your image description...",
-                        lines=3
-                    )
-                    with gr.Row():
-                        txt2img_height = gr.Slider(256, 1024, value=512, step=64, label="Height")
-                        txt2img_width = gr.Slider(256, 1024, value=512, step=64, label="Width")
-                    with gr.Row():
-                        guidance_scale = gr.Slider(1.0, 20.0, value=7.5, label="Guidance Scale")
-                        num_steps = gr.Slider(1, 50, value=20, label="Number of Steps")
-                    generate_txt2img_btn = gr.Button("Generate Image")
-                with gr.Column():
-                    txt2img_output = gr.Image(label="Generated Image")
     trial_id = gr.Textbox(visible=False)
     output_buf = gr.State()
-    # Example images
     with gr.Row():
         examples = gr.Examples(
             examples=[
@@ -401,11 +197,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
             fn=preprocess_image,
             outputs=[trial_id, image_prompt],
             run_on_click=True,
-            examples_per_page=32,  # 예제 수 감소
-            cache_examples=False  # 예제 캐싱 비활성화는 Examples 컴포넌트에서 설정
         )
     # Handlers
     image_prompt.upload(
@@ -413,7 +206,6 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
         inputs=[image_prompt],
         outputs=[trial_id, image_prompt],
     )
     image_prompt.clear(
         lambda: '',
         outputs=[trial_id],
@@ -421,62 +213,39 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     generate_btn.click(
         image_to_3d,
-        inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps,
-                slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
-        concurrency_limit=1
     ).then(
         activate_button,
-        outputs=[extract_glb_btn]
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
         outputs=[model_output, download_glb],
-        concurrency_limit=1
     ).then(
         activate_button,
-        outputs=[download_glb]
-    )
-    generate_txt2img_btn.click(
-        generate_image_from_text,
-        inputs=[text_prompt, txt2img_height, txt2img_width, guidance_scale, num_steps],
-        outputs=[txt2img_output],
-        concurrency_limit=1,
-        show_progress=True  # 진행 상황 표시
     )
 if __name__ == "__main__":
-    import warnings
-    warnings.filterwarnings('ignore')
-    # CUDA 설정 확인
-    if torch.cuda.is_available():
-        print(f"Using GPU: {torch.cuda.get_device_name()}")
-        print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
-        # CUDA 메모리 설정
-        torch.cuda.set_per_process_memory_fraction(0.8)  # GPU 메모리 사용량 제한
-    # 디렉토리 생성
-    os.makedirs(TMP_DIR, exist_ok=True)
-    # 메모리 정리
-    free_memory()
-    # 모델 초기화
-    if not initialize_models():
-        print("Failed to initialize models")
-        exit(1)
-    # Gradio 앱 실행
-    demo.queue(max_size=1).launch(
-        share=True,
-        max_threads=2,
-        show_error=True,
-        server_port=7860,
-        server_name="0.0.0.0",
-        enable_queue=True
-    )

 import gradio as gr
 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
+os.environ['SPCONV_ALGO'] = 'native'
+from typing import *
 import torch
 import numpy as np
 import imageio
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = "/tmp/Trellis-demo"
+os.makedirs(TMP_DIR, exist_ok=True)
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
+    """
+    Preprocess the input image.
+    Args:
+        image (Image.Image): The input image.
+    Returns:
+        str: uuid of the trial.
+        Image.Image: The preprocessed image.
+    """
+    trial_id = str(uuid.uuid4())
+    processed_image = pipeline.preprocess_image(image)
+    processed_image.save(f"{TMP_DIR}/{trial_id}.png")
+    return trial_id, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
         },
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
     return gs, mesh, state['trial_id']
+@spaces.GPU
+def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
+    """
+    Convert an image to a 3D model.
+    Args:
+        trial_id (str): The uuid of the trial.
+        seed (int): The random seed.
+        randomize_seed (bool): Whether to randomize the seed.
+        ss_guidance_strength (float): The guidance strength for sparse structure generation.
+        ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
+        slat_guidance_strength (float): The guidance strength for structured latent generation.
+        slat_sampling_steps (int): The number of sampling steps for structured latent generation.
+    Returns:
+        dict: The information of the generated 3D model.
+        str: The path to the video of the 3D model.
+    """
+    if randomize_seed:
+        seed = np.random.randint(0, MAX_SEED)
+    outputs = pipeline.run(
+        Image.open(f"{TMP_DIR}/{trial_id}.png"),
+        seed=seed,
+        formats=["gaussian", "mesh"],
+        preprocess_image=False,
+        sparse_structure_sampler_params={
+            "steps": ss_sampling_steps,
+            "cfg_strength": ss_guidance_strength,
+        },
+        slat_sampler_params={
+            "steps": slat_sampling_steps,
+            "cfg_strength": slat_guidance_strength,
+        },
+    )
+    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+    trial_id = uuid.uuid4()
+    video_path = f"{TMP_DIR}/{trial_id}.mp4"
+    os.makedirs(os.path.dirname(video_path), exist_ok=True)
+    imageio.mimsave(video_path, video, fps=15)
+    state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
+    return state, video_path
+@spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
+    """
+    Extract a GLB file from the 3D model.
+    Args:
+        state (dict): The state of the generated 3D model.
+        mesh_simplify (float): The mesh simplification factor.
+        texture_size (int): The texture resolution.
+    Returns:
+        str: The path to the extracted GLB file.
+    """
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = f"{TMP_DIR}/{trial_id}.glb"
     glb.export(glb_path)
     return glb_path, glb_path
 def activate_button() -> gr.Button:
     return gr.Button(interactive=True)
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
+with gr.Blocks() as demo:
     gr.Markdown("""
+    ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
+    * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
+    * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
     """)
+    with gr.Row():
+        with gr.Column():
+            image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300)
+            with gr.Accordion(label="Generation Settings", open=False):
+                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
+                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                gr.Markdown("Stage 1: Sparse Structure Generation")
+                with gr.Row():
+                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
+                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+                gr.Markdown("Stage 2: Structured Latent Generation")
+                with gr.Row():
+                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
+                    slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+            generate_btn = gr.Button("Generate")
+            with gr.Accordion(label="GLB Extraction Settings", open=False):
+                mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
+                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
+            extract_glb_btn = gr.Button("Extract GLB", interactive=False)
+        with gr.Column():
+            video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
+            model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
+            download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
     trial_id = gr.Textbox(visible=False)
     output_buf = gr.State()
+    # Example images at the bottom of the page
     with gr.Row():
         examples = gr.Examples(
             examples=[
             fn=preprocess_image,
             outputs=[trial_id, image_prompt],
             run_on_click=True,
+            examples_per_page=64,
         )
     # Handlers
     image_prompt.upload(
         inputs=[image_prompt],
         outputs=[trial_id, image_prompt],
     )
     image_prompt.clear(
         lambda: '',
         outputs=[trial_id],
     generate_btn.click(
         image_to_3d,
+        inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     ).then(
         activate_button,
+        outputs=[extract_glb_btn],
+    )
+    video_output.clear(
+        deactivate_button,
+        outputs=[extract_glb_btn],
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
         outputs=[model_output, download_glb],
     ).then(
         activate_button,
+        outputs=[download_glb],
     )
+    model_output.clear(
+        deactivate_button,
+        outputs=[download_glb],
+    )
+# Launch the Gradio app
 if __name__ == "__main__":
+    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
+    pipeline.cuda()
+    try:
+        pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
+    except:
+        pass
+    demo.launch()