Spaces:

frankleeeee
/

open-sora

Runtime error

App Files Files Community

frankleeeee commited on Apr 6

Commit

a097e62

•

1 Parent(s): 348ea80

udpated

Browse files

Files changed (1) hide show

app.py +28 -7

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ import sys
 import spaces
 import gradio as gr
 import torch
@@ -29,7 +30,7 @@ HF_STDIT_MAP = {
     "v1-HQ-16x512x512": "hpcai-tech/OpenSora-STDiT-v1-HQ-16x512x512",
 }
-def install_dependencies():
     """
     Install the required dependencies for the demo if they are not already installed.
     """
@@ -41,7 +42,9 @@ def install_dependencies():
         except (ImportError, ModuleNotFoundError):
             return False
-    # install flash attention
     if not _is_package_available("flash_attn"):
         subprocess.run(
             f"{sys.executable} -m pip install flash-attn --no-build-isolation",
@@ -49,6 +52,25 @@ def install_dependencies():
             shell=True,
         )
 def read_config(config_path):
     """
     Read the configuration file.
@@ -114,6 +136,7 @@ def parse_args():
     parser.add_argument("--port", default=None, type=int, help="The port to run the Gradio App on.")
     parser.add_argument("--host", default=None, type=str, help="The host to run the Gradio App on.")
     parser.add_argument("--share", action="store_true", help="Whether to share this gradio demo.")
     return parser.parse_args()
@@ -128,11 +151,11 @@ config = read_config(CONFIG_MAP[args.model_type])
 os.makedirs(args.output, exist_ok=True)
 # disable torch jit as it can cause failure in gradio SDK
-# since gradio sdk uses torch with cuda 11.3
 torch.jit._state.disable()
 # set up
-install_dependencies()
 # build model
 vae, text_encoder, stdit, scheduler = build_models(args.model_type, config)
@@ -141,7 +164,6 @@ vae, text_encoder, stdit, scheduler = build_models(args.model_type, config)
 def run_inference(prompt_text):
     latent_size = get_latent_size(config, vae)
-    from opensora.datasets import save_sample
     samples = scheduler.sample(
         stdit,
         text_encoder,
@@ -204,6 +226,5 @@ with gr.Blocks() as demo:
     )
 # launch
-# demo.launch(server_port=args.port, server_name=args.host, share=args.share)
-demo.launch()

 import spaces
 import gradio as gr
 import torch
+from opensora.datasets import save_sample
     "v1-HQ-16x512x512": "hpcai-tech/OpenSora-STDiT-v1-HQ-16x512x512",
 }
+def install_dependencies(enable_optimization=False):
     """
     Install the required dependencies for the demo if they are not already installed.
     """
         except (ImportError, ModuleNotFoundError):
             return False
+    # flash attention is needed no matter optimization is enabled or not
+    # because Hugging Face transformers detects flash_attn is a dependency in STDiT
+    # thus, we need to install it no matter what
     if not _is_package_available("flash_attn"):
         subprocess.run(
             f"{sys.executable} -m pip install flash-attn --no-build-isolation",
             shell=True,
         )
+    if enable_optimization:
+        # install ape
+        if not _is_package_available("apex"):
+            subprocess.run(
+                f'{sys.executable} -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git',
+                shell=True,
+            )
+        # install ninja
+        if not _is_package_available("ninja"):
+            subprocess.run(f"{sys.executable} -m pip install ninja", shell=True)
+        # install xformers
+        if not _is_package_available("xformers"):
+            subprocess.run(
+                f"{sys.executable} -m pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers",
+                shell=True,
+            )
 def read_config(config_path):
     """
     Read the configuration file.
     parser.add_argument("--port", default=None, type=int, help="The port to run the Gradio App on.")
     parser.add_argument("--host", default=None, type=str, help="The host to run the Gradio App on.")
     parser.add_argument("--share", action="store_true", help="Whether to share this gradio demo.")
+    parser.add_argument("--enable-optimization", action="store_true", help="Whether to enable optimization such as flash attention and fused layernorm")
     return parser.parse_args()
 os.makedirs(args.output, exist_ok=True)
 # disable torch jit as it can cause failure in gradio SDK
+# gradio sdk uses torch with cuda 11.3
 torch.jit._state.disable()
 # set up
+install_dependencies(enable_optimization=args.enable_optimization)
 # build model
 vae, text_encoder, stdit, scheduler = build_models(args.model_type, config)
 def run_inference(prompt_text):
     latent_size = get_latent_size(config, vae)
     samples = scheduler.sample(
         stdit,
         text_encoder,
     )
 # launch
+demo.launch(server_port=args.port, server_name=args.host, share=args.share)