SORA-3D

Running on L40S

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

1f5cf77

verified ·

1 Parent(s): 9f57959

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -72

app.py CHANGED Viewed

@@ -21,15 +21,10 @@ TMP_DIR = "/tmp/Trellis-demo"
 os.makedirs(TMP_DIR, exist_ok=True)
-# 메모리 관련 환경 변수
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:32'
-os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
-os.environ['TORCH_HOME'] = '/tmp/torch_home'
-os.environ['HF_HOME'] = '/tmp/huggingface'
-os.environ['XDG_CACHE_HOME'] = '/tmp/cache'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-os.environ['SPCONV_ALGO'] = 'native'
-os.environ['WARP_USE_CPU'] = '1'
 def initialize_models():
     global pipeline, translator, flux_pipe
@@ -37,33 +32,27 @@ def initialize_models():
     try:
         import torch
-        # 메모리 설정
-        torch.backends.cudnn.benchmark = False
-        torch.backends.cudnn.deterministic = True
         print("Initializing Trellis pipeline...")
-        # Trellis 파이프라인 초기화
         pipeline = TrellisImageTo3DPipeline.from_pretrained(
-            "JeffreyXiang/TRELLIS-image-large"
         )
-        if pipeline is None:
-            raise Exception("Failed to initialize Trellis pipeline")
         print("Initializing translator...")
-        # 번역기 초기화
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
-            device="cpu"
         )
-        if translator is None:
-            raise Exception("Failed to initialize translator")
-        # Flux 파이프라인은 나중에 초기화
-        flux_pipe = None
         print("Models initialized successfully")
         return True
@@ -79,15 +68,17 @@ def get_flux_pipe():
             free_memory()
             flux_pipe = FluxPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-dev",
-                torch_dtype=torch.float32,  # CPU 모드로 시작
                 use_safetensors=True
-            )
         except Exception as e:
             print(f"Error loading Flux pipeline: {e}")
             return None
     return flux_pipe
 def free_memory():
     """강화된 메모리 정리 함수"""
     import gc
@@ -265,7 +256,7 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
     try:
-        free_memory()  # 시작 전 메모리 정리
         # Flux 파이프라인 가져오기
         flux_pipe = get_flux_pipe()
@@ -273,25 +264,27 @@ def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
             raise Exception("Failed to load Flux pipeline")
         # 이미지 크기 제한
-        height = min(height, 512)
-        width = min(width, 512)
         # 프롬프트 처리
         base_prompt = "wbgmsst, 3D, white background"
         translated_prompt = translate_if_korean(prompt)
         final_prompt = f"{translated_prompt}, {base_prompt}"
-        with torch.inference_mode(), torch.cuda.amp.autocast():
             output = flux_pipe(
                 prompt=[final_prompt],
                 height=height,
                 width=width,
-                guidance_scale=min(guidance_scale, 7.5),  # 낮은 값으로 제한
-                num_inference_steps=min(num_steps, 20)    # 스텝 수 제한
             )
-            image = output.images[0]
-        free_memory()  # 완료 후 메모리 정리
         return image
     except Exception as e:
@@ -444,50 +437,27 @@ if __name__ == "__main__":
     import warnings
     warnings.filterwarnings('ignore')
     # 디렉토리 생성
     os.makedirs(TMP_DIR, exist_ok=True)
     # 메모리 정리
     free_memory()
-    # 모델 초기화 시도
-    retry_count = 3
-    initialized = False
-    for i in range(retry_count):
-        try:
-            if initialize_models():
-                initialized = True
-                break
-            else:
-                print(f"Initialization attempt {i+1} failed, retrying...")
-                free_memory()
-        except Exception as e:
-            print(f"Error during initialization attempt {i+1}: {str(e)}")
-            free_memory()
-    if not initialized:
-        print("Failed to initialize models after multiple attempts")
         exit(1)
-    try:
-        # rembg 사전 로드 시도
-        test_image = Image.fromarray(np.ones((32, 32, 3), dtype=np.uint8) * 255)
-        if pipeline is not None:
-            pipeline.preprocess_image(test_image)
-    except Exception as e:
-        print(f"Warning: Failed to preload rembg: {str(e)}")
     # Gradio 앱 실행
-    try:
-        demo.queue(max_size=1).launch(
-            share=True,
-            max_threads=1,
-            show_error=True,
-            server_port=7860,
-            server_name="0.0.0.0",
-            quiet=True
-        )
-    except Exception as e:
-        print(f"Error launching Gradio app: {str(e)}")
-        exit(1)

 os.makedirs(TMP_DIR, exist_ok=True)
+# GPU 메모리 관련 환경 변수 수정
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'  # A100에 맞게 증가
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # 단일 GPU 사용
+os.environ['CUDA_LAUNCH_BLOCKING'] = '0'  # A100에서는 비동기 실행 허용
 def initialize_models():
     global pipeline, translator, flux_pipe
     try:
         import torch
+        # A100 최적화 설정
+        torch.backends.cudnn.benchmark = True  # A100에서는 성능 향상을 위해 활성화
+        torch.backends.cuda.matmul.allow_tf32 = True  # TF32 허용
+        torch.backends.cudnn.allow_tf32 = True
         print("Initializing Trellis pipeline...")
         pipeline = TrellisImageTo3DPipeline.from_pretrained(
+            "JeffreyXiang/TRELLIS-image-large",
+            torch_dtype=torch.float16  # A100에서 FP16 사용
         )
+        if torch.cuda.is_available():
+            pipeline = pipeline.to("cuda")
         print("Initializing translator...")
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
+            device="cuda"  # 번역기도 GPU 사용
         )
         print("Models initialized successfully")
         return True
             free_memory()
             flux_pipe = FluxPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-dev",
+                torch_dtype=torch.float16,  # A100에서 FP16 사용
                 use_safetensors=True
+            ).to("cuda")
         except Exception as e:
             print(f"Error loading Flux pipeline: {e}")
             return None
     return flux_pipe
 def free_memory():
     """강화된 메모리 정리 함수"""
     import gc
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
     try:
+        free_memory()
         # Flux 파이프라인 가져오기
         flux_pipe = get_flux_pipe()
             raise Exception("Failed to load Flux pipeline")
         # 이미지 크기 제한
+        height = min(height, 1024)  # A100에서는 더 큰 이미지 허용
+        width = min(width, 1024)
         # 프롬프트 처리
         base_prompt = "wbgmsst, 3D, white background"
         translated_prompt = translate_if_korean(prompt)
         final_prompt = f"{translated_prompt}, {base_prompt}"
+        with torch.cuda.amp.autocast():  # A100에서 자동 혼합 정밀도 사용
             output = flux_pipe(
                 prompt=[final_prompt],
                 height=height,
                 width=width,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_steps,
+                generator=torch.Generator(device='cuda')
             )
+        image = output.images[0]
+        free_memory()
         return image
     except Exception as e:
     import warnings
     warnings.filterwarnings('ignore')
+    # CUDA 설정 확인
+    if torch.cuda.is_available():
+        print(f"Using GPU: {torch.cuda.get_device_name()}")
+        print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
     # 디렉토리 생성
     os.makedirs(TMP_DIR, exist_ok=True)
     # 메모리 정리
     free_memory()
+    # 모델 초기화
+    if not initialize_models():
+        print("Failed to initialize models")
         exit(1)
     # Gradio 앱 실행
+    demo.queue(max_size=2).launch(  # 큐 크기 증가
+        share=True,
+        max_threads=4,  # 스레드 수 증가
+        show_error=True,
+        server_port=7860,
+        server_name="0.0.0.0"
+    )