Tune-A-Video-Training-UI

Running

App Files Files Community

hysts HF staff commited on Sep 12, 2023

Commit

ecfdc8b

1 Parent(s): 1f333c3

Migrate from yapf to black

Browse files

Files changed (13) hide show

.pre-commit-config.yaml +26 -12
.style.yapf +0 -5
.vscode/settings.json +21 -0
app.py +27 -29
app_inference.py +64 -94
app_system_monitor.py +29 -30
app_training.py +77 -102
app_upload.py +34 -34
constants.py +7 -5
inference.py +12 -17
trainer.py +40 -41
uploader.py +23 -20
utils.py +12 -15

.pre-commit-config.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 exclude: patch
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.2.0
   hooks:
   - id: check-executables-have-shebangs
   - id: check-json
@@ -9,29 +9,43 @@ repos:
   - id: check-shebang-scripts-are-executable
   - id: check-toml
   - id: check-yaml
-  - id: double-quote-string-fixer
   - id: end-of-file-fixer
   - id: mixed-line-ending
-    args: ['--fix=lf']
   - id: requirements-txt-fixer
   - id: trailing-whitespace
 - repo: https://github.com/myint/docformatter
-  rev: v1.4
   hooks:
   - id: docformatter
-    args: ['--in-place']
 - repo: https://github.com/pycqa/isort
   rev: 5.12.0
   hooks:
     - id: isort
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.991
   hooks:
     - id: mypy
-      args: ['--ignore-missing-imports']
-      additional_dependencies: ['types-python-slugify']
-- repo: https://github.com/google/yapf
-  rev: v0.32.0
   hooks:
-  - id: yapf
-    args: ['--parallel', '--in-place']

 exclude: patch
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
   hooks:
   - id: check-executables-have-shebangs
   - id: check-json
   - id: check-shebang-scripts-are-executable
   - id: check-toml
   - id: check-yaml
   - id: end-of-file-fixer
   - id: mixed-line-ending
+    args: ["--fix=lf"]
   - id: requirements-txt-fixer
   - id: trailing-whitespace
 - repo: https://github.com/myint/docformatter
+  rev: v1.7.5
   hooks:
   - id: docformatter
+    args: ["--in-place"]
 - repo: https://github.com/pycqa/isort
   rev: 5.12.0
   hooks:
     - id: isort
+      args: ["--profile", "black"]
 - repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.5.1
   hooks:
     - id: mypy
+      args: ["--ignore-missing-imports"]
+      additional_dependencies: ["types-python-slugify", "types-requests", "types-PyYAML"]
+- repo: https://github.com/psf/black
+  rev: 23.9.0
   hooks:
+    - id: black
+      language_version: python3.10
+      args: ["--line-length", "119"]
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.6.1
+  hooks:
+    - id: nbstripout
+      args: ["--extra-keys", "metadata.interpreter metadata.kernelspec cell.metadata.pycharm"]
+- repo: https://github.com/nbQA-dev/nbQA
+  rev: 1.7.0
+  hooks:
+    - id: nbqa-black
+    - id: nbqa-pyupgrade
+      args: ["--py37-plus"]
+    - id: nbqa-isort
+      args: ["--float-to-top"]

.style.yapf DELETED Viewed

@@ -1,5 +0,0 @@
-[style]
-based_on_style = pep8
-blank_line_before_nested_class_or_def = false
-spaces_before_comment = 2
-split_before_logical_operator = true

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

app.py CHANGED Viewed

@@ -15,37 +15,37 @@ from app_upload import create_upload_demo
 from inference import InferencePipeline
 from trainer import Trainer
-TITLE = '# [Tune-A-Video](https://tuneavideo.github.io/)'
-ORIGINAL_SPACE_ID = 'Tune-A-Video-library/Tune-A-Video-Training-UI'
-SPACE_ID = os.getenv('SPACE_ID')
-GPU_DATA = getoutput('nvidia-smi')
-SHARED_UI_WARNING = f'''## Attention - Training doesn't work in this shared UI. You can duplicate and use it with a paid private T4 GPU.
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
-'''
 IS_SHARED_UI = SPACE_ID == ORIGINAL_SPACE_ID
-if os.getenv('SYSTEM') == 'spaces' and SPACE_ID != ORIGINAL_SPACE_ID:
     SETTINGS = f'<a href="https://huggingface.co/spaces/{SPACE_ID}/settings">Settings</a>'
 else:
-    SETTINGS = 'Settings'
-INVALID_GPU_WARNING = f'''## Attention - the specified GPU is invalid. Training may not work. Make sure you have selected a `T4 GPU` for this task.'''
-CUDA_NOT_AVAILABLE_WARNING = f'''## Attention - Running on CPU.
 <center>
 You can assign a GPU in the {SETTINGS} tab if you are running this on HF Spaces.
 You can use "T4 small/medium" to run this demo.
 </center>
-'''
-HF_TOKEN_NOT_SPECIFIED_WARNING = f'''The environment variable `HF_TOKEN` is not specified. Feel free to specify your Hugging Face token with write permission if you don't want to manually provide it for every run.
 You can check and create your Hugging Face tokens <a href="https://huggingface.co/settings/tokens" target="_blank">here</a>. You can specify environment variables in the "Repository secrets" section of the {SETTINGS} tab.
-'''
-HF_TOKEN = os.getenv('HF_TOKEN')
 def show_warning(warning_text: str) -> gr.Blocks:
@@ -58,33 +58,31 @@ def show_warning(warning_text: str) -> gr.Blocks:
 pipe = InferencePipeline(HF_TOKEN)
 trainer = Trainer()
-with gr.Blocks(css='style.css') as demo:
     if IS_SHARED_UI:
         show_warning(SHARED_UI_WARNING)
     elif not torch.cuda.is_available():
         show_warning(CUDA_NOT_AVAILABLE_WARNING)
-    elif 'T4' not in GPU_DATA:
         show_warning(INVALID_GPU_WARNING)
     gr.Markdown(TITLE)
     with gr.Tabs():
-        with gr.TabItem('Train'):
-            create_training_demo(trainer,
-                                 pipe,
-                                 disable_run_button=IS_SHARED_UI)
-        with gr.TabItem('Run'):
-            create_inference_demo(pipe,
-                                  HF_TOKEN,
-                                  disable_run_button=IS_SHARED_UI)
-        with gr.TabItem('Upload'):
-            gr.Markdown('''
             - You can use this tab to upload models later if you choose not to upload models in training time or if upload in training time failed.
-            ''')
             create_upload_demo(disable_run_button=IS_SHARED_UI)
     with gr.Row():
-        if not IS_SHARED_UI and not os.getenv('DISABLE_SYSTEM_MONITOR'):
-            with gr.Accordion(label='System info', open=False):
                 create_monitor_demo()
     if not HF_TOKEN:

 from inference import InferencePipeline
 from trainer import Trainer
+TITLE = "# [Tune-A-Video](https://tuneavideo.github.io/)"
+ORIGINAL_SPACE_ID = "Tune-A-Video-library/Tune-A-Video-Training-UI"
+SPACE_ID = os.getenv("SPACE_ID")
+GPU_DATA = getoutput("nvidia-smi")
+SHARED_UI_WARNING = f"""## Attention - Training doesn't work in this shared UI. You can duplicate and use it with a paid private T4 GPU.
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
+"""
 IS_SHARED_UI = SPACE_ID == ORIGINAL_SPACE_ID
+if os.getenv("SYSTEM") == "spaces" and SPACE_ID != ORIGINAL_SPACE_ID:
     SETTINGS = f'<a href="https://huggingface.co/spaces/{SPACE_ID}/settings">Settings</a>'
 else:
+    SETTINGS = "Settings"
+INVALID_GPU_WARNING = f"""## Attention - the specified GPU is invalid. Training may not work. Make sure you have selected a `T4 GPU` for this task."""
+CUDA_NOT_AVAILABLE_WARNING = f"""## Attention - Running on CPU.
 <center>
 You can assign a GPU in the {SETTINGS} tab if you are running this on HF Spaces.
 You can use "T4 small/medium" to run this demo.
 </center>
+"""
+HF_TOKEN_NOT_SPECIFIED_WARNING = f"""The environment variable `HF_TOKEN` is not specified. Feel free to specify your Hugging Face token with write permission if you don't want to manually provide it for every run.
 You can check and create your Hugging Face tokens <a href="https://huggingface.co/settings/tokens" target="_blank">here</a>. You can specify environment variables in the "Repository secrets" section of the {SETTINGS} tab.
+"""
+HF_TOKEN = os.getenv("HF_TOKEN")
 def show_warning(warning_text: str) -> gr.Blocks:
 pipe = InferencePipeline(HF_TOKEN)
 trainer = Trainer()
+with gr.Blocks(css="style.css") as demo:
     if IS_SHARED_UI:
         show_warning(SHARED_UI_WARNING)
     elif not torch.cuda.is_available():
         show_warning(CUDA_NOT_AVAILABLE_WARNING)
+    elif "T4" not in GPU_DATA:
         show_warning(INVALID_GPU_WARNING)
     gr.Markdown(TITLE)
     with gr.Tabs():
+        with gr.TabItem("Train"):
+            create_training_demo(trainer, pipe, disable_run_button=IS_SHARED_UI)
+        with gr.TabItem("Run"):
+            create_inference_demo(pipe, HF_TOKEN, disable_run_button=IS_SHARED_UI)
+        with gr.TabItem("Upload"):
+            gr.Markdown(
+                """
             - You can use this tab to upload models later if you choose not to upload models in training time or if upload in training time failed.
+            """
+            )
             create_upload_demo(disable_run_button=IS_SHARED_UI)
     with gr.Row():
+        if not IS_SHARED_UI and not os.getenv("DISABLE_SYSTEM_MONITOR"):
+            with gr.Accordion(label="System info", open=False):
                 create_monitor_demo()
     if not HF_TOKEN:

app_inference.py CHANGED Viewed

@@ -14,7 +14,7 @@ from utils import find_exp_dirs
 class ModelSource(enum.Enum):
     HUB_LIB = UploadTarget.MODEL_LIBRARY.value
-    LOCAL = 'Local'
 class InferenceUtil:
@@ -23,18 +23,13 @@ class InferenceUtil:
     def load_hub_model_list(self) -> dict:
         api = HfApi(token=self.hf_token)
-        choices = [
-            info.modelId
-            for info in api.list_models(author=MODEL_LIBRARY_ORG_NAME)
-        ]
-        return gr.update(choices=choices,
-                         value=choices[0] if choices else None)
     @staticmethod
     def load_local_model_list() -> dict:
         choices = find_exp_dirs()
-        return gr.update(choices=choices,
-                         value=choices[0] if choices else None)
     def reload_model_list(self, model_source: str) -> dict:
         if model_source == ModelSource.HUB_LIB.value:
@@ -48,22 +43,21 @@ class InferenceUtil:
         try:
             card = InferencePipeline.get_model_card(model_id, self.hf_token)
         except Exception:
-            return '', ''
-        base_model = getattr(card.data, 'base_model', '')
-        training_prompt = getattr(card.data, 'training_prompt', '')
         return base_model, training_prompt
-    def reload_model_list_and_update_model_info(
-            self, model_source: str) -> tuple[dict, str, str]:
         model_list_update = self.reload_model_list(model_source)
-        model_list = model_list_update['choices']
-        model_info = self.load_model_info(model_list[0] if model_list else '')
         return model_list_update, *model_info
-def create_inference_demo(pipe: InferencePipeline,
-                          hf_token: str | None = None,
-                          disable_run_button: bool = False) -> gr.Blocks:
     app = InferenceUtil(hf_token)
     with gr.Blocks() as demo:
@@ -71,84 +65,60 @@ def create_inference_demo(pipe: InferencePipeline,
             with gr.Column():
                 with gr.Box():
                     model_source = gr.Radio(
-                        label='Model Source',
-                        choices=[_.value for _ in ModelSource],
-                        value=ModelSource.HUB_LIB.value)
-                    reload_button = gr.Button('Reload Model List')
-                    model_id = gr.Dropdown(label='Model ID',
-                                           choices=None,
-                                           value=None)
-                    with gr.Accordion(
-                            label=
-                            'Model info (Base model and prompt used for training)',
-                            open=False):
                         with gr.Row():
-                            base_model_used_for_training = gr.Text(
-                                label='Base model', interactive=False)
-                            prompt_used_for_training = gr.Text(
-                                label='Training prompt', interactive=False)
-                prompt = gr.Textbox(
-                    label='Prompt',
-                    max_lines=1,
-                    placeholder='Example: "A panda is surfing"')
-                video_length = gr.Slider(label='Video length',
-                                         minimum=4,
-                                         maximum=12,
-                                         step=1,
-                                         value=8)
-                fps = gr.Slider(label='FPS',
-                                minimum=1,
-                                maximum=12,
-                                step=1,
-                                value=1)
-                seed = gr.Slider(label='Seed',
-                                 minimum=0,
-                                 maximum=100000,
-                                 step=1,
-                                 value=0)
-                with gr.Accordion('Advanced options', open=False):
-                    num_steps = gr.Slider(label='Number of Steps',
-                                          minimum=0,
-                                          maximum=100,
-                                          step=1,
-                                          value=50)
-                    guidance_scale = gr.Slider(label='Guidance scale',
-                                               minimum=0,
-                                               maximum=50,
-                                               step=0.1,
-                                               value=7.5)
-                run_button = gr.Button('Generate',
-                                       interactive=not disable_run_button)
-                gr.Markdown('''
                 - After training, you can press "Reload Model List" button to load your trained model names.
                 - It takes a few minutes to download model first.
                 - Expected time to generate an 8-frame video: 70 seconds with T4, 24 seconds with A10G, (10 seconds with A100)
-                ''')
             with gr.Column():
-                result = gr.Video(label='Result')
-        model_source.change(fn=app.reload_model_list_and_update_model_info,
-                            inputs=model_source,
-                            outputs=[
-                                model_id,
-                                base_model_used_for_training,
-                                prompt_used_for_training,
-                            ])
-        reload_button.click(fn=app.reload_model_list_and_update_model_info,
-                            inputs=model_source,
-                            outputs=[
-                                model_id,
-                                base_model_used_for_training,
-                                prompt_used_for_training,
-                            ])
-        model_id.change(fn=app.load_model_info,
-                        inputs=model_id,
-                        outputs=[
-                            base_model_used_for_training,
-                            prompt_used_for_training,
-                        ])
         inputs = [
             model_id,
             prompt,
@@ -163,10 +133,10 @@ def create_inference_demo(pipe: InferencePipeline,
     return demo
-if __name__ == '__main__':
     import os
-    hf_token = os.getenv('HF_TOKEN')
     pipe = InferencePipeline(hf_token)
     demo = create_inference_demo(pipe, hf_token)
     demo.queue(api_open=False, max_size=10).launch()

 class ModelSource(enum.Enum):
     HUB_LIB = UploadTarget.MODEL_LIBRARY.value
+    LOCAL = "Local"
 class InferenceUtil:
     def load_hub_model_list(self) -> dict:
         api = HfApi(token=self.hf_token)
+        choices = [info.modelId for info in api.list_models(author=MODEL_LIBRARY_ORG_NAME)]
+        return gr.update(choices=choices, value=choices[0] if choices else None)
     @staticmethod
     def load_local_model_list() -> dict:
         choices = find_exp_dirs()
+        return gr.update(choices=choices, value=choices[0] if choices else None)
     def reload_model_list(self, model_source: str) -> dict:
         if model_source == ModelSource.HUB_LIB.value:
         try:
             card = InferencePipeline.get_model_card(model_id, self.hf_token)
         except Exception:
+            return "", ""
+        base_model = getattr(card.data, "base_model", "")
+        training_prompt = getattr(card.data, "training_prompt", "")
         return base_model, training_prompt
+    def reload_model_list_and_update_model_info(self, model_source: str) -> tuple[dict, str, str]:
         model_list_update = self.reload_model_list(model_source)
+        model_list = model_list_update["choices"]
+        model_info = self.load_model_info(model_list[0] if model_list else "")
         return model_list_update, *model_info
+def create_inference_demo(
+    pipe: InferencePipeline, hf_token: str | None = None, disable_run_button: bool = False
+) -> gr.Blocks:
     app = InferenceUtil(hf_token)
     with gr.Blocks() as demo:
             with gr.Column():
                 with gr.Box():
                     model_source = gr.Radio(
+                        label="Model Source", choices=[_.value for _ in ModelSource], value=ModelSource.HUB_LIB.value
+                    )
+                    reload_button = gr.Button("Reload Model List")
+                    model_id = gr.Dropdown(label="Model ID", choices=None, value=None)
+                    with gr.Accordion(label="Model info (Base model and prompt used for training)", open=False):
                         with gr.Row():
+                            base_model_used_for_training = gr.Text(label="Base model", interactive=False)
+                            prompt_used_for_training = gr.Text(label="Training prompt", interactive=False)
+                prompt = gr.Textbox(label="Prompt", max_lines=1, placeholder='Example: "A panda is surfing"')
+                video_length = gr.Slider(label="Video length", minimum=4, maximum=12, step=1, value=8)
+                fps = gr.Slider(label="FPS", minimum=1, maximum=12, step=1, value=1)
+                seed = gr.Slider(label="Seed", minimum=0, maximum=100000, step=1, value=0)
+                with gr.Accordion("Advanced options", open=False):
+                    num_steps = gr.Slider(label="Number of Steps", minimum=0, maximum=100, step=1, value=50)
+                    guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=50, step=0.1, value=7.5)
+                run_button = gr.Button("Generate", interactive=not disable_run_button)
+                gr.Markdown(
+                    """
                 - After training, you can press "Reload Model List" button to load your trained model names.
                 - It takes a few minutes to download model first.
                 - Expected time to generate an 8-frame video: 70 seconds with T4, 24 seconds with A10G, (10 seconds with A100)
+                """
+                )
             with gr.Column():
+                result = gr.Video(label="Result")
+        model_source.change(
+            fn=app.reload_model_list_and_update_model_info,
+            inputs=model_source,
+            outputs=[
+                model_id,
+                base_model_used_for_training,
+                prompt_used_for_training,
+            ],
+        )
+        reload_button.click(
+            fn=app.reload_model_list_and_update_model_info,
+            inputs=model_source,
+            outputs=[
+                model_id,
+                base_model_used_for_training,
+                prompt_used_for_training,
+            ],
+        )
+        model_id.change(
+            fn=app.load_model_info,
+            inputs=model_id,
+            outputs=[
+                base_model_used_for_training,
+                prompt_used_for_training,
+            ],
+        )
         inputs = [
             model_id,
             prompt,
     return demo
+if __name__ == "__main__":
     import os
+    hf_token = os.getenv("HF_TOKEN")
     pipe = InferencePipeline(hf_token)
     demo = create_inference_demo(pipe, hf_token)
     demo.queue(api_open=False, max_size=10).launch()

app_system_monitor.py CHANGED Viewed

@@ -16,15 +16,12 @@ class SystemMonitor:
     def __init__(self):
         self.devices = nvitop.Device.all()
-        self.cpu_memory_usage = collections.deque(
-            [0 for _ in range(self.MAX_SIZE)], maxlen=self.MAX_SIZE)
-        self.cpu_memory_usage_str = ''
-        self.gpu_memory_usage = collections.deque(
-            [0 for _ in range(self.MAX_SIZE)], maxlen=self.MAX_SIZE)
-        self.gpu_util = collections.deque([0 for _ in range(self.MAX_SIZE)],
-                                          maxlen=self.MAX_SIZE)
-        self.gpu_memory_usage_str = ''
-        self.gpu_util_str = ''
     def update(self) -> None:
         self.update_cpu()
@@ -33,7 +30,9 @@ class SystemMonitor:
     def update_cpu(self) -> None:
         memory = psutil.virtual_memory()
         self.cpu_memory_usage.append(memory.percent)
-        self.cpu_memory_usage_str = f'{memory.used / 1024**3:0.2f}GiB / {memory.total / 1024**3:0.2f}GiB ({memory.percent}%)'
     def update_gpu(self) -> None:
         if not self.devices:
@@ -41,36 +40,36 @@ class SystemMonitor:
         device = self.devices[0]
         self.gpu_memory_usage.append(device.memory_percent())
         self.gpu_util.append(device.gpu_utilization())
-        self.gpu_memory_usage_str = f'{device.memory_usage()} ({device.memory_percent()}%)'
-        self.gpu_util_str = f'{device.gpu_utilization()}%'
     def get_json(self) -> dict[str, str]:
         return {
-            'CPU memory usage': self.cpu_memory_usage_str,
-            'GPU memory usage': self.gpu_memory_usage_str,
-            'GPU Util': self.gpu_util_str,
         }
     def get_graph_data(self) -> dict[str, list[int | float]]:
         return {
-            'index': list(range(-self.MAX_SIZE + 1, 1)),
-            'CPU memory usage': self.cpu_memory_usage,
-            'GPU memory usage': self.gpu_memory_usage,
-            'GPU Util': self.gpu_util,
         }
     def get_graph(self):
         df = pd.DataFrame(self.get_graph_data())
-        return px.line(df,
-                       x='index',
-                       y=[
-                           'CPU memory usage',
-                           'GPU memory usage',
-                           'GPU Util',
-                       ],
-                       range_y=[-5,
-                                105]).update_layout(xaxis_title='Time',
-                                                    yaxis_title='Percentage')
 def create_monitor_demo() -> gr.Blocks:
@@ -82,6 +81,6 @@ def create_monitor_demo() -> gr.Blocks:
     return demo
-if __name__ == '__main__':
     demo = create_monitor_demo()
     demo.queue(api_open=False).launch()

     def __init__(self):
         self.devices = nvitop.Device.all()
+        self.cpu_memory_usage = collections.deque([0 for _ in range(self.MAX_SIZE)], maxlen=self.MAX_SIZE)
+        self.cpu_memory_usage_str = ""
+        self.gpu_memory_usage = collections.deque([0 for _ in range(self.MAX_SIZE)], maxlen=self.MAX_SIZE)
+        self.gpu_util = collections.deque([0 for _ in range(self.MAX_SIZE)], maxlen=self.MAX_SIZE)
+        self.gpu_memory_usage_str = ""
+        self.gpu_util_str = ""
     def update(self) -> None:
         self.update_cpu()
     def update_cpu(self) -> None:
         memory = psutil.virtual_memory()
         self.cpu_memory_usage.append(memory.percent)
+        self.cpu_memory_usage_str = (
+            f"{memory.used / 1024**3:0.2f}GiB / {memory.total / 1024**3:0.2f}GiB ({memory.percent}%)"
+        )
     def update_gpu(self) -> None:
         if not self.devices:
         device = self.devices[0]
         self.gpu_memory_usage.append(device.memory_percent())
         self.gpu_util.append(device.gpu_utilization())
+        self.gpu_memory_usage_str = f"{device.memory_usage()} ({device.memory_percent()}%)"
+        self.gpu_util_str = f"{device.gpu_utilization()}%"
     def get_json(self) -> dict[str, str]:
         return {
+            "CPU memory usage": self.cpu_memory_usage_str,
+            "GPU memory usage": self.gpu_memory_usage_str,
+            "GPU Util": self.gpu_util_str,
         }
     def get_graph_data(self) -> dict[str, list[int | float]]:
         return {
+            "index": list(range(-self.MAX_SIZE + 1, 1)),
+            "CPU memory usage": self.cpu_memory_usage,
+            "GPU memory usage": self.gpu_memory_usage,
+            "GPU Util": self.gpu_util,
         }
     def get_graph(self):
         df = pd.DataFrame(self.get_graph_data())
+        return px.line(
+            df,
+            x="index",
+            y=[
+                "CPU memory usage",
+                "GPU memory usage",
+                "GPU Util",
+            ],
+            range_y=[-5, 105],
+        ).update_layout(xaxis_title="Time", yaxis_title="Percentage")
 def create_monitor_demo() -> gr.Blocks:
     return demo
+if __name__ == "__main__":
     demo = create_monitor_demo()
     demo.queue(api_open=False).launch()

app_training.py CHANGED Viewed

@@ -11,145 +11,120 @@ from inference import InferencePipeline
 from trainer import Trainer
-def create_training_demo(trainer: Trainer,
-                         pipe: InferencePipeline | None = None,
-                         disable_run_button: bool = False) -> gr.Blocks:
     def read_log() -> str:
         with open(trainer.log_file) as f:
             lines = f.readlines()
-        return ''.join(lines[-10:])
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 with gr.Box():
-                    gr.Markdown('Training Data')
-                    training_video = gr.File(label='Training video')
-                    training_prompt = gr.Textbox(
-                        label='Training prompt',
-                        max_lines=1,
-                        placeholder='A man is surfing')
-                    gr.Markdown('''
                         - Upload a video and write a `Training Prompt` that describes the video.
-                        ''')
             with gr.Column():
                 with gr.Box():
-                    gr.Markdown('Training Parameters')
                     with gr.Row():
-                        base_model = gr.Text(
-                            label='Base Model',
-                            value='CompVis/stable-diffusion-v1-4',
-                            max_lines=1)
-                        resolution = gr.Dropdown(choices=['512', '768'],
-                                                 value='512',
-                                                 label='Resolution',
-                                                 visible=False)
-                    hf_token = gr.Text(label='Hugging Face Write Token',
-                                       type='password',
-                                       visible=os.getenv('HF_TOKEN') is None)
-                    with gr.Accordion(label='Advanced options', open=False):
-                        num_training_steps = gr.Number(
-                            label='Number of Training Steps',
-                            value=300,
-                            precision=0)
-                        learning_rate = gr.Number(label='Learning Rate',
-                                                  value=0.000035)
                         gradient_accumulation = gr.Number(
-                            label='Number of Gradient Accumulation',
-                            value=1,
-                            precision=0)
-                        seed = gr.Slider(label='Seed',
-                                         minimum=0,
-                                         maximum=100000,
-                                         step=1,
-                                         randomize=True,
-                                         value=0)
-                        fp16 = gr.Checkbox(label='FP16', value=True)
-                        use_8bit_adam = gr.Checkbox(label='Use 8bit Adam',
-                                                    value=False)
-                        checkpointing_steps = gr.Number(
-                            label='Checkpointing Steps',
-                            value=1000,
-                            precision=0)
-                        validation_epochs = gr.Number(
-                            label='Validation Epochs', value=100, precision=0)
-                    gr.Markdown('''
                         - The base model must be a Stable Diffusion model compatible with [diffusers](https://github.com/huggingface/diffusers) library.
                         - Expected time to train a model for 300 steps: ~20 minutes with T4
                         - You can check the training status by pressing the "Open logs" button if you are running this on your Space.
-                        ''')
         with gr.Row():
             with gr.Column():
-                gr.Markdown('Output Model')
-                output_model_name = gr.Text(label='Name of your model',
-                                            placeholder='The surfer man',
-                                            max_lines=1)
                 validation_prompt = gr.Text(
-                    label='Validation Prompt',
-                    placeholder=
-                    'prompt to test the model, e.g: a dog is surfing')
             with gr.Column():
-                gr.Markdown('Upload Settings')
                 with gr.Row():
-                    upload_to_hub = gr.Checkbox(label='Upload model to Hub',
-                                                value=True)
-                    use_private_repo = gr.Checkbox(label='Private', value=True)
-                    delete_existing_repo = gr.Checkbox(
-                        label='Delete existing repo of the same name',
-                        value=False)
                     upload_to = gr.Radio(
-                        label='Upload to',
                         choices=[_.value for _ in UploadTarget],
-                        value=UploadTarget.MODEL_LIBRARY.value)
         pause_space_after_training = gr.Checkbox(
-            label='Pause this Space after training',
             value=False,
-            interactive=bool(os.getenv('SPACE_ID')),
-            visible=False)
-        run_button = gr.Button('Start Training',
-                               interactive=not disable_run_button)
         with gr.Box():
-            gr.Text(label='Log',
-                    value=read_log,
-                    lines=10,
-                    max_lines=10,
-                    every=1)
         if pipe is not None:
             run_button.click(fn=pipe.clear)
-        run_button.click(fn=trainer.run,
-                         inputs=[
-                             training_video,
-                             training_prompt,
-                             output_model_name,
-                             delete_existing_repo,
-                             validation_prompt,
-                             base_model,
-                             resolution,
-                             num_training_steps,
-                             learning_rate,
-                             gradient_accumulation,
-                             seed,
-                             fp16,
-                             use_8bit_adam,
-                             checkpointing_steps,
-                             validation_epochs,
-                             upload_to_hub,
-                             use_private_repo,
-                             delete_existing_repo,
-                             upload_to,
-                             pause_space_after_training,
-                             hf_token,
-                         ])
     return demo
-if __name__ == '__main__':
     trainer = Trainer()
     demo = create_training_demo(trainer)
     demo.queue(api_open=False, max_size=1).launch()

 from trainer import Trainer
+def create_training_demo(
+    trainer: Trainer, pipe: InferencePipeline | None = None, disable_run_button: bool = False
+) -> gr.Blocks:
     def read_log() -> str:
         with open(trainer.log_file) as f:
             lines = f.readlines()
+        return "".join(lines[-10:])
     with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 with gr.Box():
+                    gr.Markdown("Training Data")
+                    training_video = gr.File(label="Training video")
+                    training_prompt = gr.Textbox(label="Training prompt", max_lines=1, placeholder="A man is surfing")
+                    gr.Markdown(
+                        """
                         - Upload a video and write a `Training Prompt` that describes the video.
+                        """
+                    )
             with gr.Column():
                 with gr.Box():
+                    gr.Markdown("Training Parameters")
                     with gr.Row():
+                        base_model = gr.Text(label="Base Model", value="CompVis/stable-diffusion-v1-4", max_lines=1)
+                        resolution = gr.Dropdown(
+                            choices=["512", "768"], value="512", label="Resolution", visible=False
+                        )
+                    hf_token = gr.Text(
+                        label="Hugging Face Write Token", type="password", visible=os.getenv("HF_TOKEN") is None
+                    )
+                    with gr.Accordion(label="Advanced options", open=False):
+                        num_training_steps = gr.Number(label="Number of Training Steps", value=300, precision=0)
+                        learning_rate = gr.Number(label="Learning Rate", value=0.000035)
                         gradient_accumulation = gr.Number(
+                            label="Number of Gradient Accumulation", value=1, precision=0
+                        )
+                        seed = gr.Slider(label="Seed", minimum=0, maximum=100000, step=1, randomize=True, value=0)
+                        fp16 = gr.Checkbox(label="FP16", value=True)
+                        use_8bit_adam = gr.Checkbox(label="Use 8bit Adam", value=False)
+                        checkpointing_steps = gr.Number(label="Checkpointing Steps", value=1000, precision=0)
+                        validation_epochs = gr.Number(label="Validation Epochs", value=100, precision=0)
+                    gr.Markdown(
+                        """
                         - The base model must be a Stable Diffusion model compatible with [diffusers](https://github.com/huggingface/diffusers) library.
                         - Expected time to train a model for 300 steps: ~20 minutes with T4
                         - You can check the training status by pressing the "Open logs" button if you are running this on your Space.
+                        """
+                    )
         with gr.Row():
             with gr.Column():
+                gr.Markdown("Output Model")
+                output_model_name = gr.Text(label="Name of your model", placeholder="The surfer man", max_lines=1)
                 validation_prompt = gr.Text(
+                    label="Validation Prompt", placeholder="prompt to test the model, e.g: a dog is surfing"
+                )
             with gr.Column():
+                gr.Markdown("Upload Settings")
                 with gr.Row():
+                    upload_to_hub = gr.Checkbox(label="Upload model to Hub", value=True)
+                    use_private_repo = gr.Checkbox(label="Private", value=True)
+                    delete_existing_repo = gr.Checkbox(label="Delete existing repo of the same name", value=False)
                     upload_to = gr.Radio(
+                        label="Upload to",
                         choices=[_.value for _ in UploadTarget],
+                        value=UploadTarget.MODEL_LIBRARY.value,
+                    )
         pause_space_after_training = gr.Checkbox(
+            label="Pause this Space after training",
             value=False,
+            interactive=bool(os.getenv("SPACE_ID")),
+            visible=False,
+        )
+        run_button = gr.Button("Start Training", interactive=not disable_run_button)
         with gr.Box():
+            gr.Text(label="Log", value=read_log, lines=10, max_lines=10, every=1)
         if pipe is not None:
             run_button.click(fn=pipe.clear)
+        run_button.click(
+            fn=trainer.run,
+            inputs=[
+                training_video,
+                training_prompt,
+                output_model_name,
+                delete_existing_repo,
+                validation_prompt,
+                base_model,
+                resolution,
+                num_training_steps,
+                learning_rate,
+                gradient_accumulation,
+                seed,
+                fp16,
+                use_8bit_adam,
+                checkpointing_steps,
+                validation_epochs,
+                upload_to_hub,
+                use_private_repo,
+                delete_existing_repo,
+                upload_to,
+                pause_space_after_training,
+                hf_token,
+            ],
+        )
     return demo
+if __name__ == "__main__":
     trainer = Trainer()
     demo = create_training_demo(trainer)
     demo.queue(api_open=False, max_size=1).launch()

app_upload.py CHANGED Viewed

@@ -21,49 +21,49 @@ def create_upload_demo(disable_run_button: bool = False) -> gr.Blocks:
     with gr.Blocks() as demo:
         with gr.Box():
-            gr.Markdown('Local Models')
-            reload_button = gr.Button('Reload Model List')
             model_dir = gr.Dropdown(
-                label='Model names',
-                choices=model_dirs,
-                value=model_dirs[0] if model_dirs else None)
         with gr.Box():
-            gr.Markdown('Upload Settings')
             with gr.Row():
-                use_private_repo = gr.Checkbox(label='Private', value=True)
-                delete_existing_repo = gr.Checkbox(
-                    label='Delete existing repo of the same name', value=False)
-            upload_to = gr.Radio(label='Upload to',
-                                 choices=[_.value for _ in UploadTarget],
-                                 value=UploadTarget.MODEL_LIBRARY.value)
-            model_name = gr.Textbox(label='Model Name')
-            hf_token = gr.Text(label='Hugging Face Write Token',
-                               type='password',
-                               visible=os.getenv('HF_TOKEN') is None)
-        upload_button = gr.Button('Upload', interactive=not disable_run_button)
-        gr.Markdown(f'''
             - You can upload your trained model to your personal profile (i.e. `https://huggingface.co/{{your_username}}/{{model_name}}`) or to the public [Tune-A-Video Library](https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}) (i.e. `https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}/{{model_name}}`).
-            ''')
         with gr.Box():
-            gr.Markdown('Output message')
             output_message = gr.Markdown()
-        reload_button.click(fn=load_local_model_list,
-                            inputs=None,
-                            outputs=model_dir)
-        upload_button.click(fn=upload,
-                            inputs=[
-                                model_dir,
-                                model_name,
-                                upload_to,
-                                use_private_repo,
-                                delete_existing_repo,
-                                hf_token,
-                            ],
-                            outputs=output_message)
     return demo
-if __name__ == '__main__':
     demo = create_upload_demo()
     demo.queue(api_open=False, max_size=1).launch()

     with gr.Blocks() as demo:
         with gr.Box():
+            gr.Markdown("Local Models")
+            reload_button = gr.Button("Reload Model List")
             model_dir = gr.Dropdown(
+                label="Model names", choices=model_dirs, value=model_dirs[0] if model_dirs else None
+            )
         with gr.Box():
+            gr.Markdown("Upload Settings")
             with gr.Row():
+                use_private_repo = gr.Checkbox(label="Private", value=True)
+                delete_existing_repo = gr.Checkbox(label="Delete existing repo of the same name", value=False)
+            upload_to = gr.Radio(
+                label="Upload to", choices=[_.value for _ in UploadTarget], value=UploadTarget.MODEL_LIBRARY.value
+            )
+            model_name = gr.Textbox(label="Model Name")
+            hf_token = gr.Text(
+                label="Hugging Face Write Token", type="password", visible=os.getenv("HF_TOKEN") is None
+            )
+        upload_button = gr.Button("Upload", interactive=not disable_run_button)
+        gr.Markdown(
+            f"""
             - You can upload your trained model to your personal profile (i.e. `https://huggingface.co/{{your_username}}/{{model_name}}`) or to the public [Tune-A-Video Library](https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}) (i.e. `https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}/{{model_name}}`).
+            """
+        )
         with gr.Box():
+            gr.Markdown("Output message")
             output_message = gr.Markdown()
+        reload_button.click(fn=load_local_model_list, inputs=None, outputs=model_dir)
+        upload_button.click(
+            fn=upload,
+            inputs=[
+                model_dir,
+                model_name,
+                upload_to,
+                use_private_repo,
+                delete_existing_repo,
+                hf_token,
+            ],
+            outputs=output_message,
+        )
     return demo
+if __name__ == "__main__":
     demo = create_upload_demo()
     demo.queue(api_open=False, max_size=1).launch()

constants.py CHANGED Viewed

@@ -2,10 +2,12 @@ import enum
 class UploadTarget(enum.Enum):
-    PERSONAL_PROFILE = 'Personal Profile'
-    MODEL_LIBRARY = 'Tune-A-Video Library'
-MODEL_LIBRARY_ORG_NAME = 'Tune-A-Video-library'
-SAMPLE_MODEL_REPO = 'Tune-A-Video-library/a-man-is-surfing'
-URL_TO_JOIN_MODEL_LIBRARY_ORG = 'https://huggingface.co/organizations/Tune-A-Video-library/share/YjTcaNJmKyeHFpMBioHhzBcTzCYddVErEk'

 class UploadTarget(enum.Enum):
+    PERSONAL_PROFILE = "Personal Profile"
+    MODEL_LIBRARY = "Tune-A-Video Library"
+MODEL_LIBRARY_ORG_NAME = "Tune-A-Video-library"
+SAMPLE_MODEL_REPO = "Tune-A-Video-library/a-man-is-surfing"
+URL_TO_JOIN_MODEL_LIBRARY_ORG = (
+    "https://huggingface.co/organizations/Tune-A-Video-library/share/YjTcaNJmKyeHFpMBioHhzBcTzCYddVErEk"
+)

inference.py CHANGED Viewed

@@ -13,7 +13,7 @@ from diffusers.utils.import_utils import is_xformers_available
 from einops import rearrange
 from huggingface_hub import ModelCard
-sys.path.append('Tune-A-Video')
 from tuneavideo.models.unet import UNet3DConditionModel
 from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
@@ -23,8 +23,7 @@ class InferencePipeline:
     def __init__(self, hf_token: str | None = None):
         self.hf_token = hf_token
         self.pipe = None
-        self.device = torch.device(
-            'cuda:0' if torch.cuda.is_available() else 'cpu')
         self.model_id = None
     def clear(self) -> None:
@@ -39,10 +38,9 @@ class InferencePipeline:
         return pathlib.Path(model_id).exists()
     @staticmethod
-    def get_model_card(model_id: str,
-                       hf_token: str | None = None) -> ModelCard:
         if InferencePipeline.check_if_model_is_local(model_id):
-            card_path = (pathlib.Path(model_id) / 'README.md').as_posix()
         else:
             card_path = model_id
         return ModelCard.load(card_path, token=hf_token)
@@ -57,14 +55,11 @@ class InferencePipeline:
             return
         base_model_id = self.get_base_model_info(model_id, self.hf_token)
         unet = UNet3DConditionModel.from_pretrained(
-            model_id,
-            subfolder='unet',
-            torch_dtype=torch.float16,
-            use_auth_token=self.hf_token)
-        pipe = TuneAVideoPipeline.from_pretrained(base_model_id,
-                                                  unet=unet,
-                                                  torch_dtype=torch.float16,
-                                                  use_auth_token=self.hf_token)
         pipe = pipe.to(self.device)
         if is_xformers_available():
             pipe.unet.enable_xformers_memory_efficient_attention()
@@ -82,7 +77,7 @@ class InferencePipeline:
         guidance_scale: float,
     ) -> PIL.Image.Image:
         if not torch.cuda.is_available():
-            raise gr.Error('CUDA is not available.')
         self.load_pipe(model_id)
@@ -97,10 +92,10 @@ class InferencePipeline:
             generator=generator,
         )  # type: ignore
-        frames = rearrange(out.videos[0], 'c t h w -> t h w c')
         frames = (frames * 255).to(torch.uint8).numpy()
-        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
         writer = imageio.get_writer(out_file.name, fps=fps)
         for frame in frames:
             writer.append_data(frame)

 from einops import rearrange
 from huggingface_hub import ModelCard
+sys.path.append("Tune-A-Video")
 from tuneavideo.models.unet import UNet3DConditionModel
 from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
     def __init__(self, hf_token: str | None = None):
         self.hf_token = hf_token
         self.pipe = None
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self.model_id = None
     def clear(self) -> None:
         return pathlib.Path(model_id).exists()
     @staticmethod
+    def get_model_card(model_id: str, hf_token: str | None = None) -> ModelCard:
         if InferencePipeline.check_if_model_is_local(model_id):
+            card_path = (pathlib.Path(model_id) / "README.md").as_posix()
         else:
             card_path = model_id
         return ModelCard.load(card_path, token=hf_token)
             return
         base_model_id = self.get_base_model_info(model_id, self.hf_token)
         unet = UNet3DConditionModel.from_pretrained(
+            model_id, subfolder="unet", torch_dtype=torch.float16, use_auth_token=self.hf_token
+        )
+        pipe = TuneAVideoPipeline.from_pretrained(
+            base_model_id, unet=unet, torch_dtype=torch.float16, use_auth_token=self.hf_token
+        )
         pipe = pipe.to(self.device)
         if is_xformers_available():
             pipe.unet.enable_xformers_memory_efficient_attention()
         guidance_scale: float,
     ) -> PIL.Image.Image:
         if not torch.cuda.is_available():
+            raise gr.Error("CUDA is not available.")
         self.load_pipe(model_id)
             generator=generator,
         )  # type: ignore
+        frames = rearrange(out.videos[0], "c t h w -> t h w c")
         frames = (frames * 255).to(torch.uint8).numpy()
+        out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
         writer = imageio.get_writer(out_file.name, fps=fps)
         for frame in frames:
             writer.append_data(frame)

trainer.py CHANGED Viewed

@@ -16,26 +16,24 @@ from omegaconf import OmegaConf
 from uploader import upload
 from utils import save_model_card
-sys.path.append('Tune-A-Video')
 class Trainer:
     def __init__(self):
-        self.checkpoint_dir = pathlib.Path('checkpoints')
         self.checkpoint_dir.mkdir(exist_ok=True)
-        self.log_file = pathlib.Path('log.txt')
         self.log_file.touch(exist_ok=True)
     def download_base_model(self, base_model_id: str) -> str:
         model_dir = self.checkpoint_dir / base_model_id
         if not model_dir.exists():
-            org_name = base_model_id.split('/')[0]
             org_dir = self.checkpoint_dir / org_name
             org_dir.mkdir(exist_ok=True)
-            subprocess.run(shlex.split(
-                f'git clone https://huggingface.co/{base_model_id}'),
-                           cwd=org_dir)
         return model_dir.as_posix()
     def run(
@@ -63,28 +61,28 @@ class Trainer:
         hf_token: str,
     ) -> None:
         if not torch.cuda.is_available():
-            raise RuntimeError('CUDA is not available.')
         if training_video is None:
-            raise ValueError('You need to upload a video.')
         if not training_prompt:
-            raise ValueError('The training prompt is missing.')
         if not validation_prompt:
-            raise ValueError('The validation prompt is missing.')
         resolution = int(resolution_s)
         if not output_model_name:
-            timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
-            output_model_name = f'tune-a-video-{timestamp}'
         output_model_name = slugify.slugify(output_model_name)
         repo_dir = pathlib.Path(__file__).parent
-        output_dir = repo_dir / 'experiments' / output_model_name
         if overwrite_existing_model or upload_to_hub:
             shutil.rmtree(output_dir, ignore_errors=True)
         output_dir.mkdir(parents=True)
-        config = OmegaConf.load('Tune-A-Video/configs/man-surfing.yaml')
         config.pretrained_model_path = self.download_base_model(base_model)
         config.output_dir = output_dir.as_posix()
         config.train_data.video_path = training_video.name  # type: ignore
@@ -107,39 +105,40 @@ class Trainer:
         config.checkpointing_steps = checkpointing_steps
         config.validation_steps = validation_epochs
         config.seed = seed
-        config.mixed_precision = 'fp16' if fp16 else ''
         config.use_8bit_adam = use_8bit_adam
-        config_path = output_dir / 'config.yaml'
-        with open(config_path, 'w') as f:
             OmegaConf.save(config, f)
-        command = f'accelerate launch Tune-A-Video/train_tuneavideo.py --config {config_path}'
-        with open(self.log_file, 'w') as f:
-            subprocess.run(shlex.split(command),
-                           stdout=f,
-                           stderr=subprocess.STDOUT,
-                           text=True)
-        save_model_card(save_dir=output_dir,
-                        base_model=base_model,
-                        training_prompt=training_prompt,
-                        test_prompt=validation_prompt,
-                        test_image_dir='samples')
-        with open(self.log_file, 'a') as f:
-            f.write('Training completed!\n')
         if upload_to_hub:
-            upload_message = upload(local_folder_path=output_dir.as_posix(),
-                                    target_repo_name=output_model_name,
-                                    upload_to=upload_to,
-                                    private=use_private_repo,
-                                    delete_existing_repo=delete_existing_repo,
-                                    hf_token=hf_token)
-            with open(self.log_file, 'a') as f:
                 f.write(upload_message)
         if pause_space_after_training:
-            if space_id := os.getenv('SPACE_ID'):
-                api = HfApi(token=os.getenv('HF_TOKEN') or hf_token)
                 api.pause_space(repo_id=space_id)

 from uploader import upload
 from utils import save_model_card
+sys.path.append("Tune-A-Video")
 class Trainer:
     def __init__(self):
+        self.checkpoint_dir = pathlib.Path("checkpoints")
         self.checkpoint_dir.mkdir(exist_ok=True)
+        self.log_file = pathlib.Path("log.txt")
         self.log_file.touch(exist_ok=True)
     def download_base_model(self, base_model_id: str) -> str:
         model_dir = self.checkpoint_dir / base_model_id
         if not model_dir.exists():
+            org_name = base_model_id.split("/")[0]
             org_dir = self.checkpoint_dir / org_name
             org_dir.mkdir(exist_ok=True)
+            subprocess.run(shlex.split(f"git clone https://huggingface.co/{base_model_id}"), cwd=org_dir)
         return model_dir.as_posix()
     def run(
         hf_token: str,
     ) -> None:
         if not torch.cuda.is_available():
+            raise RuntimeError("CUDA is not available.")
         if training_video is None:
+            raise ValueError("You need to upload a video.")
         if not training_prompt:
+            raise ValueError("The training prompt is missing.")
         if not validation_prompt:
+            raise ValueError("The validation prompt is missing.")
         resolution = int(resolution_s)
         if not output_model_name:
+            timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+            output_model_name = f"tune-a-video-{timestamp}"
         output_model_name = slugify.slugify(output_model_name)
         repo_dir = pathlib.Path(__file__).parent
+        output_dir = repo_dir / "experiments" / output_model_name
         if overwrite_existing_model or upload_to_hub:
             shutil.rmtree(output_dir, ignore_errors=True)
         output_dir.mkdir(parents=True)
+        config = OmegaConf.load("Tune-A-Video/configs/man-surfing.yaml")
         config.pretrained_model_path = self.download_base_model(base_model)
         config.output_dir = output_dir.as_posix()
         config.train_data.video_path = training_video.name  # type: ignore
         config.checkpointing_steps = checkpointing_steps
         config.validation_steps = validation_epochs
         config.seed = seed
+        config.mixed_precision = "fp16" if fp16 else ""
         config.use_8bit_adam = use_8bit_adam
+        config_path = output_dir / "config.yaml"
+        with open(config_path, "w") as f:
             OmegaConf.save(config, f)
+        command = f"accelerate launch Tune-A-Video/train_tuneavideo.py --config {config_path}"
+        with open(self.log_file, "w") as f:
+            subprocess.run(shlex.split(command), stdout=f, stderr=subprocess.STDOUT, text=True)
+        save_model_card(
+            save_dir=output_dir,
+            base_model=base_model,
+            training_prompt=training_prompt,
+            test_prompt=validation_prompt,
+            test_image_dir="samples",
+        )
+        with open(self.log_file, "a") as f:
+            f.write("Training completed!\n")
         if upload_to_hub:
+            upload_message = upload(
+                local_folder_path=output_dir.as_posix(),
+                target_repo_name=output_model_name,
+                upload_to=upload_to,
+                private=use_private_repo,
+                delete_existing_repo=delete_existing_repo,
+                hf_token=hf_token,
+            )
+            with open(self.log_file, "a") as f:
                 f.write(upload_message)
         if pause_space_after_training:
+            if space_id := os.getenv("SPACE_ID"):
+                api = HfApi(token=os.getenv("HF_TOKEN") or hf_token)
                 api.pause_space(repo_id=space_id)

uploader.py CHANGED Viewed

@@ -8,24 +8,30 @@ import subprocess
 import slugify
 from huggingface_hub import HfApi
-from constants import (MODEL_LIBRARY_ORG_NAME, URL_TO_JOIN_MODEL_LIBRARY_ORG,
-                       UploadTarget)
 def join_model_library_org(hf_token: str) -> None:
     subprocess.run(
         shlex.split(
             f'curl -X POST -H "Authorization: Bearer {hf_token}" -H "Content-Type: application/json" {URL_TO_JOIN_MODEL_LIBRARY_ORG}'
-        ))
-def upload(local_folder_path: str,
-           target_repo_name: str,
-           upload_to: str,
-           private: bool = True,
-           delete_existing_repo: bool = False,
-           hf_token: str = '') -> str:
-    hf_token = os.getenv('HF_TOKEN') or hf_token
     if not hf_token:
         raise ValueError
     api = HfApi(token=hf_token)
@@ -37,27 +43,24 @@ def upload(local_folder_path: str,
     target_repo_name = slugify.slugify(target_repo_name)
     if upload_to == UploadTarget.PERSONAL_PROFILE.value:
-        organization = api.whoami()['name']
     elif upload_to == UploadTarget.MODEL_LIBRARY.value:
         organization = MODEL_LIBRARY_ORG_NAME
         join_model_library_org(hf_token)
     else:
         raise ValueError
-    repo_id = f'{organization}/{target_repo_name}'
     if delete_existing_repo:
         try:
-            api.delete_repo(repo_id, repo_type='model')
         except Exception:
             pass
     try:
-        api.create_repo(repo_id, repo_type='model', private=private)
-        api.upload_folder(repo_id=repo_id,
-                          folder_path=local_folder_path,
-                          path_in_repo='.',
-                          repo_type='model')
-        url = f'https://huggingface.co/{repo_id}'
-        message = f'Your model was successfully uploaded to {url}.'
     except Exception as e:
         message = str(e)
     return message

 import slugify
 from huggingface_hub import HfApi
+from constants import (
+    MODEL_LIBRARY_ORG_NAME,
+    URL_TO_JOIN_MODEL_LIBRARY_ORG,
+    UploadTarget,
+)
 def join_model_library_org(hf_token: str) -> None:
     subprocess.run(
         shlex.split(
             f'curl -X POST -H "Authorization: Bearer {hf_token}" -H "Content-Type: application/json" {URL_TO_JOIN_MODEL_LIBRARY_ORG}'
+        )
+    )
+def upload(
+    local_folder_path: str,
+    target_repo_name: str,
+    upload_to: str,
+    private: bool = True,
+    delete_existing_repo: bool = False,
+    hf_token: str = "",
+) -> str:
+    hf_token = os.getenv("HF_TOKEN") or hf_token
     if not hf_token:
         raise ValueError
     api = HfApi(token=hf_token)
     target_repo_name = slugify.slugify(target_repo_name)
     if upload_to == UploadTarget.PERSONAL_PROFILE.value:
+        organization = api.whoami()["name"]
     elif upload_to == UploadTarget.MODEL_LIBRARY.value:
         organization = MODEL_LIBRARY_ORG_NAME
         join_model_library_org(hf_token)
     else:
         raise ValueError
+    repo_id = f"{organization}/{target_repo_name}"
     if delete_existing_repo:
         try:
+            api.delete_repo(repo_id, repo_type="model")
         except Exception:
             pass
     try:
+        api.create_repo(repo_id, repo_type="model", private=private)
+        api.upload_folder(repo_id=repo_id, folder_path=local_folder_path, path_in_repo=".", repo_type="model")
+        url = f"https://huggingface.co/{repo_id}"
+        message = f"Your model was successfully uploaded to {url}."
     except Exception as e:
         message = str(e)
     return message

utils.py CHANGED Viewed

@@ -5,14 +5,11 @@ import pathlib
 def find_exp_dirs() -> list[str]:
     repo_dir = pathlib.Path(__file__).parent
-    exp_root_dir = repo_dir / 'experiments'
     if not exp_root_dir.exists():
         return []
-    exp_dirs = sorted(exp_root_dir.glob('*'))
-    exp_dirs = [
-        exp_dir for exp_dir in exp_dirs
-        if (exp_dir / 'model_index.json').exists()
-    ]
     return [path.relative_to(repo_dir).as_posix() for path in exp_dirs]
@@ -20,21 +17,21 @@ def save_model_card(
     save_dir: pathlib.Path,
     base_model: str,
     training_prompt: str,
-    test_prompt: str = '',
-    test_image_dir: str = '',
 ) -> None:
-    image_str = ''
     if test_prompt and test_image_dir:
-        image_paths = sorted((save_dir / test_image_dir).glob('*.gif'))
         if image_paths:
             image_path = image_paths[-1]
             rel_path = image_path.relative_to(save_dir)
-            image_str = f'''## Samples
 Test prompt: {test_prompt}
-![{image_path.stem}]({rel_path})'''
-    model_card = f'''---
 license: creativeml-openrail-m
 base_model: {base_model}
 training_prompt: {training_prompt}
@@ -59,7 +56,7 @@ inference: false
 ## Related papers:
 - [Tune-A-Video](https://arxiv.org/abs/2212.11565): One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation
 - [Stable-Diffusion](https://arxiv.org/abs/2112.10752): High-Resolution Image Synthesis with Latent Diffusion Models
-'''
-    with open(save_dir / 'README.md', 'w') as f:
         f.write(model_card)

 def find_exp_dirs() -> list[str]:
     repo_dir = pathlib.Path(__file__).parent
+    exp_root_dir = repo_dir / "experiments"
     if not exp_root_dir.exists():
         return []
+    exp_dirs = sorted(exp_root_dir.glob("*"))
+    exp_dirs = [exp_dir for exp_dir in exp_dirs if (exp_dir / "model_index.json").exists()]
     return [path.relative_to(repo_dir).as_posix() for path in exp_dirs]
     save_dir: pathlib.Path,
     base_model: str,
     training_prompt: str,
+    test_prompt: str = "",
+    test_image_dir: str = "",
 ) -> None:
+    image_str = ""
     if test_prompt and test_image_dir:
+        image_paths = sorted((save_dir / test_image_dir).glob("*.gif"))
         if image_paths:
             image_path = image_paths[-1]
             rel_path = image_path.relative_to(save_dir)
+            image_str = f"""## Samples
 Test prompt: {test_prompt}
+![{image_path.stem}]({rel_path})"""
+    model_card = f"""---
 license: creativeml-openrail-m
 base_model: {base_model}
 training_prompt: {training_prompt}
 ## Related papers:
 - [Tune-A-Video](https://arxiv.org/abs/2212.11565): One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation
 - [Stable-Diffusion](https://arxiv.org/abs/2112.10752): High-Resolution Image Synthesis with Latent Diffusion Models
+"""
+    with open(save_dir / "README.md", "w") as f:
         f.write(model_card)