Tune-A-Video-inference

Running

App Files Files Community

hysts HF staff commited on Sep 27, 2023

Commit

532cb3d

•

1 Parent(s): 4c44f87

Migrate from yapf to black

Browse files

Files changed (5) hide show

.pre-commit-config.yaml +54 -35
.style.yapf +0 -5
.vscode/settings.json +21 -0
app.py +76 -97
inference.py +12 -17

.pre-commit-config.yaml CHANGED Viewed

@@ -1,37 +1,56 @@
 exclude: patch
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.2.0
-  hooks:
-  - id: check-executables-have-shebangs
-  - id: check-json
-  - id: check-merge-conflict
-  - id: check-shebang-scripts-are-executable
-  - id: check-toml
-  - id: check-yaml
-  - id: double-quote-string-fixer
-  - id: end-of-file-fixer
-  - id: mixed-line-ending
-    args: ['--fix=lf']
-  - id: requirements-txt-fixer
-  - id: trailing-whitespace
-- repo: https://github.com/myint/docformatter
-  rev: v1.4
-  hooks:
-  - id: docformatter
-    args: ['--in-place']
-- repo: https://github.com/pycqa/isort
-  rev: 5.12.0
-  hooks:
-    - id: isort
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.991
-  hooks:
-    - id: mypy
-      args: ['--ignore-missing-imports']
-      additional_dependencies: ['types-python-slugify']
-- repo: https://github.com/google/yapf
-  rev: v0.32.0
-  hooks:
-  - id: yapf
-    args: ['--parallel', '--in-place']

 exclude: patch
 repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.5.1
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          ["types-python-slugify", "types-requests", "types-PyYAML"]
+  - repo: https://github.com/psf/black
+    rev: 23.9.1
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.style.yapf DELETED Viewed

@@ -1,5 +0,0 @@
-[style]
-based_on_style = pep8
-blank_line_before_nested_class_or_def = false
-spaces_before_comment = 2
-split_before_logical_operator = true

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

app.py CHANGED Viewed

@@ -18,89 +18,64 @@ class InferenceUtil:
         try:
             card = InferencePipeline.get_model_card(model_id, self.hf_token)
         except Exception:
-            return '', ''
-        base_model = getattr(card.data, 'base_model', '')
-        training_prompt = getattr(card.data, 'training_prompt', '')
         return base_model, training_prompt
-DESCRIPTION = '# [Tune-A-Video](https://tuneavideo.github.io/)'
 if not torch.cuda.is_available():
-    DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
-CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv(
-    'CACHE_EXAMPLES') == '1'
-HF_TOKEN = os.getenv('HF_TOKEN')
 pipe = InferencePipeline(HF_TOKEN)
 app = InferenceUtil(HF_TOKEN)
-with gr.Blocks(css='style.css') as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Row():
         with gr.Column():
             with gr.Box():
                 model_id = gr.Dropdown(
-                    label='Model ID',
                     choices=[
-                        'Tune-A-Video-library/a-man-is-surfing',
-                        'Tune-A-Video-library/mo-di-bear-guitar',
-                        'Tune-A-Video-library/redshift-man-skiing',
                     ],
-                    value='Tune-A-Video-library/a-man-is-surfing')
-                with gr.Accordion(
-                        label=
-                        'Model info (Base model and prompt used for training)',
-                        open=False):
                     with gr.Row():
-                        base_model_used_for_training = gr.Text(
-                            label='Base model', interactive=False)
-                        prompt_used_for_training = gr.Text(
-                            label='Training prompt', interactive=False)
-            prompt = gr.Textbox(label='Prompt',
-                                max_lines=1,
-                                placeholder='Example: "A panda is surfing"')
-            video_length = gr.Slider(label='Video length',
-                                     minimum=4,
-                                     maximum=12,
-                                     step=1,
-                                     value=8)
-            fps = gr.Slider(label='FPS',
-                            minimum=1,
-                            maximum=12,
-                            step=1,
-                            value=1)
-            seed = gr.Slider(label='Seed',
-                             minimum=0,
-                             maximum=100000,
-                             step=1,
-                             value=0)
-            with gr.Accordion('Other Parameters', open=False):
-                num_steps = gr.Slider(label='Number of Steps',
-                                      minimum=0,
-                                      maximum=100,
-                                      step=1,
-                                      value=50)
-                guidance_scale = gr.Slider(label='CFG Scale',
-                                           minimum=0,
-                                           maximum=50,
-                                           step=0.1,
-                                           value=7.5)
-            run_button = gr.Button('Generate')
-            gr.Markdown('''
             - It takes a few minutes to download model first.
             - Expected time to generate an 8-frame video: 70 seconds with T4, 24 seconds with A10G, (10 seconds with A100)
-            ''')
         with gr.Column():
-            result = gr.Video(label='Result')
     with gr.Row():
         examples = [
             [
-                'Tune-A-Video-library/a-man-is-surfing',
-                'A panda is surfing.',
                 8,
                 1,
                 3,
@@ -108,8 +83,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/a-man-is-surfing',
-                'A racoon is surfing, cartoon style.',
                 8,
                 1,
                 3,
@@ -117,8 +92,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/mo-di-bear-guitar',
-                'a handsome prince is playing guitar, modern disney style.',
                 8,
                 1,
                 123,
@@ -126,8 +101,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/mo-di-bear-guitar',
-                'a magical princess is playing guitar, modern disney style.',
                 8,
                 1,
                 123,
@@ -135,8 +110,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/mo-di-bear-guitar',
-                'a rabbit is playing guitar, modern disney style.',
                 8,
                 1,
                 123,
@@ -144,8 +119,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/mo-di-bear-guitar',
-                'a baby is playing guitar, modern disney style.',
                 8,
                 1,
                 123,
@@ -153,8 +128,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/redshift-man-skiing',
-                '(redshift style) spider man is skiing.',
                 8,
                 1,
                 123,
@@ -162,8 +137,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/redshift-man-skiing',
-                '(redshift style) black widow is skiing.',
                 8,
                 1,
                 123,
@@ -171,8 +146,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/redshift-man-skiing',
-                '(redshift style) batman is skiing.',
                 8,
                 1,
                 123,
@@ -180,8 +155,8 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
             [
-                'Tune-A-Video-library/redshift-man-skiing',
-                '(redshift style) hulk is skiing.',
                 8,
                 1,
                 123,
@@ -189,26 +164,30 @@ with gr.Blocks(css='style.css') as demo:
                 7.5,
             ],
         ]
-        gr.Examples(examples=examples,
-                    inputs=[
-                        model_id,
-                        prompt,
-                        video_length,
-                        fps,
-                        seed,
-                        num_steps,
-                        guidance_scale,
-                    ],
-                    outputs=result,
-                    fn=pipe.run,
-                    cache_examples=CACHE_EXAMPLES)
-    model_id.change(fn=app.load_model_info,
-                    inputs=model_id,
-                    outputs=[
-                        base_model_used_for_training,
-                        prompt_used_for_training,
-                    ])
     inputs = [
         model_id,
         prompt,

         try:
             card = InferencePipeline.get_model_card(model_id, self.hf_token)
         except Exception:
+            return "", ""
+        base_model = getattr(card.data, "base_model", "")
+        training_prompt = getattr(card.data, "training_prompt", "")
         return base_model, training_prompt
+DESCRIPTION = "# [Tune-A-Video](https://tuneavideo.github.io/)"
 if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") == "1"
+HF_TOKEN = os.getenv("HF_TOKEN")
 pipe = InferencePipeline(HF_TOKEN)
 app = InferenceUtil(HF_TOKEN)
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     with gr.Row():
         with gr.Column():
             with gr.Box():
                 model_id = gr.Dropdown(
+                    label="Model ID",
                     choices=[
+                        "Tune-A-Video-library/a-man-is-surfing",
+                        "Tune-A-Video-library/mo-di-bear-guitar",
+                        "Tune-A-Video-library/redshift-man-skiing",
                     ],
+                    value="Tune-A-Video-library/a-man-is-surfing",
+                )
+                with gr.Accordion(label="Model info (Base model and prompt used for training)", open=False):
                     with gr.Row():
+                        base_model_used_for_training = gr.Text(label="Base model", interactive=False)
+                        prompt_used_for_training = gr.Text(label="Training prompt", interactive=False)
+            prompt = gr.Textbox(label="Prompt", max_lines=1, placeholder='Example: "A panda is surfing"')
+            video_length = gr.Slider(label="Video length", minimum=4, maximum=12, step=1, value=8)
+            fps = gr.Slider(label="FPS", minimum=1, maximum=12, step=1, value=1)
+            seed = gr.Slider(label="Seed", minimum=0, maximum=100000, step=1, value=0)
+            with gr.Accordion("Other Parameters", open=False):
+                num_steps = gr.Slider(label="Number of Steps", minimum=0, maximum=100, step=1, value=50)
+                guidance_scale = gr.Slider(label="CFG Scale", minimum=0, maximum=50, step=0.1, value=7.5)
+            run_button = gr.Button("Generate")
+            gr.Markdown(
+                """
             - It takes a few minutes to download model first.
             - Expected time to generate an 8-frame video: 70 seconds with T4, 24 seconds with A10G, (10 seconds with A100)
+            """
+            )
         with gr.Column():
+            result = gr.Video(label="Result")
     with gr.Row():
         examples = [
             [
+                "Tune-A-Video-library/a-man-is-surfing",
+                "A panda is surfing.",
                 8,
                 1,
                 3,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/a-man-is-surfing",
+                "A racoon is surfing, cartoon style.",
                 8,
                 1,
                 3,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/mo-di-bear-guitar",
+                "a handsome prince is playing guitar, modern disney style.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/mo-di-bear-guitar",
+                "a magical princess is playing guitar, modern disney style.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/mo-di-bear-guitar",
+                "a rabbit is playing guitar, modern disney style.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/mo-di-bear-guitar",
+                "a baby is playing guitar, modern disney style.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/redshift-man-skiing",
+                "(redshift style) spider man is skiing.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/redshift-man-skiing",
+                "(redshift style) black widow is skiing.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/redshift-man-skiing",
+                "(redshift style) batman is skiing.",
                 8,
                 1,
                 123,
                 7.5,
             ],
             [
+                "Tune-A-Video-library/redshift-man-skiing",
+                "(redshift style) hulk is skiing.",
                 8,
                 1,
                 123,
                 7.5,
             ],
         ]
+        gr.Examples(
+            examples=examples,
+            inputs=[
+                model_id,
+                prompt,
+                video_length,
+                fps,
+                seed,
+                num_steps,
+                guidance_scale,
+            ],
+            outputs=result,
+            fn=pipe.run,
+            cache_examples=CACHE_EXAMPLES,
+        )
+    model_id.change(
+        fn=app.load_model_info,
+        inputs=model_id,
+        outputs=[
+            base_model_used_for_training,
+            prompt_used_for_training,
+        ],
+    )
     inputs = [
         model_id,
         prompt,

inference.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers.utils.import_utils import is_xformers_available
 from einops import rearrange
 from huggingface_hub import ModelCard
-sys.path.append('Tune-A-Video')
 from tuneavideo.models.unet import UNet3DConditionModel
 from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
@@ -23,8 +23,7 @@ class InferencePipeline:
     def __init__(self, hf_token: str | None = None):
         self.hf_token = hf_token
         self.pipe = None
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
         self.model_id = None
     def clear(self) -> None:
@@ -39,10 +38,9 @@ class InferencePipeline:
         return pathlib.Path(model_id).exists()
     @staticmethod
-    def get_model_card(model_id: str,
-                       hf_token: str | None = None) -> ModelCard:
         if InferencePipeline.check_if_model_is_local(model_id):
-            card_path = (pathlib.Path(model_id) / 'README.md').as_posix()
         else:
             card_path = model_id
         return ModelCard.load(card_path, token=hf_token)
@@ -57,14 +55,11 @@ class InferencePipeline:
             return
         base_model_id = self.get_base_model_info(model_id, self.hf_token)
         unet = UNet3DConditionModel.from_pretrained(
-            model_id,
-            subfolder='unet',
-            torch_dtype=torch.float16,
-            use_auth_token=self.hf_token)
-        pipe = TuneAVideoPipeline.from_pretrained(base_model_id,
-                                                  unet=unet,
-                                                  torch_dtype=torch.float16,
-                                                  use_auth_token=self.hf_token)
         pipe = pipe.to(self.device)
         if is_xformers_available():
             pipe.unet.enable_xformers_memory_efficient_attention()
@@ -82,7 +77,7 @@ class InferencePipeline:
         guidance_scale: float,
     ) -> PIL.Image.Image:
         if not torch.cuda.is_available():
-            raise gr.Error('CUDA is not available.')
         self.load_pipe(model_id)
@@ -97,10 +92,10 @@ class InferencePipeline:
             generator=generator,
         )  # type: ignore
-        frames = rearrange(out.videos[0], 'c t h w -> t h w c')
         frames = (frames * 255).to(torch.uint8).numpy()
-        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         writer = imageio.get_writer(out_file.name, fps=fps)
         for frame in frames:
             writer.append_data(frame)

 from einops import rearrange
 from huggingface_hub import ModelCard
+sys.path.append("Tune-A-Video")
 from tuneavideo.models.unet import UNet3DConditionModel
 from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
     def __init__(self, hf_token: str | None = None):
         self.hf_token = hf_token
         self.pipe = None
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self.model_id = None
     def clear(self) -> None:
         return pathlib.Path(model_id).exists()
     @staticmethod
+    def get_model_card(model_id: str, hf_token: str | None = None) -> ModelCard:
         if InferencePipeline.check_if_model_is_local(model_id):
+            card_path = (pathlib.Path(model_id) / "README.md").as_posix()
         else:
             card_path = model_id
         return ModelCard.load(card_path, token=hf_token)
             return
         base_model_id = self.get_base_model_info(model_id, self.hf_token)
         unet = UNet3DConditionModel.from_pretrained(
+            model_id, subfolder="unet", torch_dtype=torch.float16, use_auth_token=self.hf_token
+        )
+        pipe = TuneAVideoPipeline.from_pretrained(
+            base_model_id, unet=unet, torch_dtype=torch.float16, use_auth_token=self.hf_token
+        )
         pipe = pipe.to(self.device)
         if is_xformers_available():
             pipe.unet.enable_xformers_memory_efficient_attention()
         guidance_scale: float,
     ) -> PIL.Image.Image:
         if not torch.cuda.is_available():
+            raise gr.Error("CUDA is not available.")
         self.load_pipe(model_id)
             generator=generator,
         )  # type: ignore
+        frames = rearrange(out.videos[0], "c t h w -> t h w c")
         frames = (frames * 255).to(torch.uint8).numpy()
+        out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
         writer = imageio.get_writer(out_file.name, fps=fps)
         for frame in frames:
             writer.append_data(frame)