Tune-A-Video-Training-UI

Sleeping

App Files Files Community

Philbotic

hysts HF staff commited on Feb 1, 2023

Commit

e52b09c

0 Parent(s):

Duplicate from Tune-A-Video-library/Tune-A-Video-Training-UI

Browse files

Co-authored-by: hysts <hysts@users.noreply.huggingface.co>

Files changed (22) hide show

.gitattributes +35 -0
.gitignore +164 -0
.gitmodules +3 -0
.pre-commit-config.yaml +37 -0
.style.yapf +5 -0
Dockerfile +57 -0
LICENSE +21 -0
README.md +12 -0
Tune-A-Video +1 -0
app.py +76 -0
app_inference.py +170 -0
app_training.py +140 -0
app_upload.py +100 -0
constants.py +10 -0
inference.py +109 -0
packages.txt +1 -0
patch +15 -0
requirements.txt +19 -0
style.css +3 -0
trainer.py +156 -0
uploader.py +42 -0
utils.py +65 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.whl filter=lfs diff=lfs merge=lfs -text
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+checkpoints/
+experiments/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "Tune-A-Video"]
+	path = Tune-A-Video
+	url = https://github.com/showlab/Tune-A-Video

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+exclude: patch
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-executables-have-shebangs
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-shebang-scripts-are-executable
+  - id: check-toml
+  - id: check-yaml
+  - id: double-quote-string-fixer
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+- repo: https://github.com/myint/docformatter
+  rev: v1.4
+  hooks:
+  - id: docformatter
+    args: ['--in-place']
+- repo: https://github.com/pycqa/isort
+  rev: 5.12.0
+  hooks:
+    - id: isort
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.991
+  hooks:
+    - id: mypy
+      args: ['--ignore-missing-imports']
+      additional_dependencies: ['types-python-slugify']
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: ['--parallel', '--in-place']

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

Dockerfile ADDED Viewed

	@@ -0,0 +1,57 @@

+FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # ffmpeg \
+    ffmpeg \
+    x264 \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ENV PYTHON_VERSION=3.10.9
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel
+RUN pip install --no-cache-dir -U torch==1.13.1 torchvision==0.14.1
+COPY --chown=1000 requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
+COPY --chown=1000 . ${HOME}/app
+RUN cd Tune-A-Video && patch -p1 < ../patch
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 hysts
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Tune-A-Video Training UI
+emoji: ⚡
+colorFrom: red
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+duplicated_from: Tune-A-Video-library/Tune-A-Video-Training-UI
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Tune-A-Video ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit b2c8c3eeac0df5c5d9eccc4dd2153e17b83c638c

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import os
+import gradio as gr
+import torch
+from app_inference import create_inference_demo
+from app_training import create_training_demo
+from app_upload import create_upload_demo
+from inference import InferencePipeline
+from trainer import Trainer
+TITLE = '# [Tune-A-Video](https://tuneavideo.github.io/) Training UI'
+ORIGINAL_SPACE_ID = 'Tune-A-Video-library/Tune-A-Video-Training-UI'
+SPACE_ID = os.getenv('SPACE_ID', ORIGINAL_SPACE_ID)
+SHARED_UI_WARNING = f'''# Attention - This Space doesn't work in this shared UI. You can duplicate and use it with a paid private T4 GPU. (Please note that there seems to be an issue with training on the A10G GPU now. The model doesn't learn anything when trained on A10G. Training on T4 works perfectly fine and inference works fine on both.)
+<center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
+'''
+if os.getenv('SYSTEM') == 'spaces' and SPACE_ID != ORIGINAL_SPACE_ID:
+    SETTINGS = f'<a href="https://huggingface.co/spaces/{SPACE_ID}/settings">Settings</a>'
+else:
+    SETTINGS = 'Settings'
+CUDA_NOT_AVAILABLE_WARNING = f'''# Attention - Running on CPU.
+<center>
+You can assign a GPU in the {SETTINGS} tab if you are running this on HF Spaces.
+You can use "T4 small/medium" or "A10G small/large" to run this demo.
+</center>
+'''
+HF_TOKEN_NOT_SPECIFIED_WARNING = f'''# Attention - The environment variable `HF_TOKEN` is not specified. Please specify your Hugging Face token with write permission as the value of it.
+<center>
+You can check and create your Hugging Face tokens <a href="https://huggingface.co/settings/tokens" target="_blank">here</a>.
+You can specify environment variables in the "Repository secrets" section of the {SETTINGS} tab.
+</center>
+'''
+HF_TOKEN = os.getenv('HF_TOKEN')
+def show_warning(warning_text: str) -> gr.Blocks:
+    with gr.Blocks() as demo:
+        with gr.Box():
+            gr.Markdown(warning_text)
+    return demo
+pipe = InferencePipeline(HF_TOKEN)
+trainer = Trainer(HF_TOKEN)
+with gr.Blocks(css='style.css') as demo:
+    if os.getenv('IS_SHARED_UI'):
+        show_warning(SHARED_UI_WARNING)
+    if not torch.cuda.is_available():
+        show_warning(CUDA_NOT_AVAILABLE_WARNING)
+    if not HF_TOKEN:
+        show_warning(HF_TOKEN_NOT_SPECIFIED_WARNING)
+    gr.Markdown(TITLE)
+    with gr.Tabs():
+        with gr.TabItem('Train'):
+            create_training_demo(trainer, pipe)
+        with gr.TabItem('Test'):
+            create_inference_demo(pipe, HF_TOKEN)
+        with gr.TabItem('Upload'):
+            gr.Markdown('''
+            - You can use this tab to upload models later if you choose not to upload models in training time or if upload in training time failed.
+            ''')
+            create_upload_demo(HF_TOKEN)
+demo.queue(max_size=1).launch(share=False)

app_inference.py ADDED Viewed

	@@ -0,0 +1,170 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import enum
+import gradio as gr
+from huggingface_hub import HfApi
+from constants import MODEL_LIBRARY_ORG_NAME, UploadTarget
+from inference import InferencePipeline
+from utils import find_exp_dirs
+class ModelSource(enum.Enum):
+    HUB_LIB = UploadTarget.MODEL_LIBRARY.value
+    LOCAL = 'Local'
+class InferenceUtil:
+    def __init__(self, hf_token: str | None):
+        self.hf_token = hf_token
+    def load_hub_model_list(self) -> dict:
+        api = HfApi(token=self.hf_token)
+        choices = [
+            info.modelId
+            for info in api.list_models(author=MODEL_LIBRARY_ORG_NAME)
+        ]
+        return gr.update(choices=choices,
+                         value=choices[0] if choices else None)
+    @staticmethod
+    def load_local_model_list() -> dict:
+        choices = find_exp_dirs()
+        return gr.update(choices=choices,
+                         value=choices[0] if choices else None)
+    def reload_model_list(self, model_source: str) -> dict:
+        if model_source == ModelSource.HUB_LIB.value:
+            return self.load_hub_model_list()
+        elif model_source == ModelSource.LOCAL.value:
+            return self.load_local_model_list()
+        else:
+            raise ValueError
+    def load_model_info(self, model_id: str) -> tuple[str, str]:
+        try:
+            card = InferencePipeline.get_model_card(model_id, self.hf_token)
+        except Exception:
+            return '', ''
+        base_model = getattr(card.data, 'base_model', '')
+        training_prompt = getattr(card.data, 'training_prompt', '')
+        return base_model, training_prompt
+    def reload_model_list_and_update_model_info(
+            self, model_source: str) -> tuple[dict, str, str]:
+        model_list_update = self.reload_model_list(model_source)
+        model_list = model_list_update['choices']
+        model_info = self.load_model_info(model_list[0] if model_list else '')
+        return model_list_update, *model_info
+def create_inference_demo(pipe: InferencePipeline,
+                          hf_token: str | None = None) -> gr.Blocks:
+    app = InferenceUtil(hf_token)
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column():
+                with gr.Box():
+                    model_source = gr.Radio(
+                        label='Model Source',
+                        choices=[_.value for _ in ModelSource],
+                        value=ModelSource.HUB_LIB.value)
+                    reload_button = gr.Button('Reload Model List')
+                    model_id = gr.Dropdown(label='Model ID',
+                                           choices=None,
+                                           value=None)
+                    with gr.Accordion(
+                            label=
+                            'Model info (Base model and prompt used for training)',
+                            open=False):
+                        with gr.Row():
+                            base_model_used_for_training = gr.Text(
+                                label='Base model', interactive=False)
+                            prompt_used_for_training = gr.Text(
+                                label='Training prompt', interactive=False)
+                prompt = gr.Textbox(
+                    label='Prompt',
+                    max_lines=1,
+                    placeholder='Example: "A panda is surfing"')
+                video_length = gr.Slider(label='Video length',
+                                         minimum=4,
+                                         maximum=12,
+                                         step=1,
+                                         value=8)
+                fps = gr.Slider(label='FPS',
+                                minimum=1,
+                                maximum=12,
+                                step=1,
+                                value=1)
+                seed = gr.Slider(label='Seed',
+                                 minimum=0,
+                                 maximum=100000,
+                                 step=1,
+                                 value=0)
+                with gr.Accordion('Other Parameters', open=False):
+                    num_steps = gr.Slider(label='Number of Steps',
+                                          minimum=0,
+                                          maximum=100,
+                                          step=1,
+                                          value=50)
+                    guidance_scale = gr.Slider(label='CFG Scale',
+                                               minimum=0,
+                                               maximum=50,
+                                               step=0.1,
+                                               value=7.5)
+                run_button = gr.Button('Generate')
+                gr.Markdown('''
+                - After training, you can press "Reload Model List" button to load your trained model names.
+                - It takes a few minutes to download model first.
+                - Expected time to generate an 8-frame video: 70 seconds with T4, 24 seconds with A10G, (10 seconds with A100)
+                ''')
+            with gr.Column():
+                result = gr.Video(label='Result')
+        model_source.change(fn=app.reload_model_list_and_update_model_info,
+                            inputs=model_source,
+                            outputs=[
+                                model_id,
+                                base_model_used_for_training,
+                                prompt_used_for_training,
+                            ])
+        reload_button.click(fn=app.reload_model_list_and_update_model_info,
+                            inputs=model_source,
+                            outputs=[
+                                model_id,
+                                base_model_used_for_training,
+                                prompt_used_for_training,
+                            ])
+        model_id.change(fn=app.load_model_info,
+                        inputs=model_id,
+                        outputs=[
+                            base_model_used_for_training,
+                            prompt_used_for_training,
+                        ])
+        inputs = [
+            model_id,
+            prompt,
+            video_length,
+            fps,
+            seed,
+            num_steps,
+            guidance_scale,
+        ]
+        prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
+        run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
+    return demo
+if __name__ == '__main__':
+    import os
+    hf_token = os.getenv('HF_TOKEN')
+    pipe = InferencePipeline(hf_token)
+    demo = create_inference_demo(pipe, hf_token)
+    demo.queue(max_size=10).launch(share=False)

app_training.py ADDED Viewed

	@@ -0,0 +1,140 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import os
+import gradio as gr
+from constants import MODEL_LIBRARY_ORG_NAME, SAMPLE_MODEL_REPO, UploadTarget
+from inference import InferencePipeline
+from trainer import Trainer
+def create_training_demo(trainer: Trainer,
+                         pipe: InferencePipeline | None = None) -> gr.Blocks:
+    with gr.Blocks() as demo:
+        with gr.Row():
+            with gr.Column():
+                with gr.Box():
+                    gr.Markdown('Training Data')
+                    training_video = gr.File(label='Training video')
+                    training_prompt = gr.Textbox(
+                        label='Training prompt',
+                        max_lines=1,
+                        placeholder='A man is surfing')
+                    gr.Markdown('''
+                        - Upload a video and write a prompt describing the video.
+                        ''')
+                with gr.Box():
+                    gr.Markdown('Output Model')
+                    output_model_name = gr.Text(label='Name of your model',
+                                                max_lines=1)
+                    delete_existing_model = gr.Checkbox(
+                        label='Delete existing model of the same name',
+                        value=False)
+                    validation_prompt = gr.Text(label='Validation Prompt')
+                with gr.Box():
+                    gr.Markdown('Upload Settings')
+                    with gr.Row():
+                        upload_to_hub = gr.Checkbox(
+                            label='Upload model to Hub', value=True)
+                        use_private_repo = gr.Checkbox(label='Private',
+                                                       value=True)
+                        delete_existing_repo = gr.Checkbox(
+                            label='Delete existing repo of the same name',
+                            value=False)
+                    upload_to = gr.Radio(
+                        label='Upload to',
+                        choices=[_.value for _ in UploadTarget],
+                        value=UploadTarget.MODEL_LIBRARY.value)
+                    gr.Markdown(f'''
+                    - By default, trained models will be uploaded to [Tune-A-Video Library](https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}) (see [this example model](https://huggingface.co/{SAMPLE_MODEL_REPO})).
+                    - You can also choose "Personal Profile", in which case, the model will be uploaded to https://huggingface.co/{{your_username}}/{{model_name}}.
+                    ''')
+            with gr.Box():
+                gr.Markdown('Training Parameters')
+                with gr.Row():
+                    base_model = gr.Text(label='Base Model',
+                                         value='CompVis/stable-diffusion-v1-4',
+                                         max_lines=1)
+                    resolution = gr.Dropdown(choices=['512', '768'],
+                                             value='512',
+                                             label='Resolution',
+                                             visible=False)
+                num_training_steps = gr.Number(
+                    label='Number of Training Steps', value=300, precision=0)
+                learning_rate = gr.Number(label='Learning Rate',
+                                          value=0.000035)
+                gradient_accumulation = gr.Number(
+                    label='Number of Gradient Accumulation',
+                    value=1,
+                    precision=0)
+                seed = gr.Slider(label='Seed',
+                                 minimum=0,
+                                 maximum=100000,
+                                 step=1,
+                                 value=0)
+                fp16 = gr.Checkbox(label='FP16', value=True)
+                use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=False)
+                checkpointing_steps = gr.Number(label='Checkpointing Steps',
+                                                value=1000,
+                                                precision=0)
+                validation_epochs = gr.Number(label='Validation Epochs',
+                                              value=100,
+                                              precision=0)
+                gr.Markdown('''
+                    - The base model must be a model that is compatible with [diffusers](https://github.com/huggingface/diffusers) library.
+                    - It takes a few minutes to download the base model first.
+                    - Expected time to train a model for 300 steps: 20 minutes with T4, 8 minutes with A10G, (4 minutes with A100)
+                    - It takes a few minutes to upload your trained model.
+                    - You may want to try a small number of steps first, like 1, to see if everything works fine in your environment.
+                    - You can check the training status by pressing the "Open logs" button if you are running this on your Space.
+                    ''')
+        remove_gpu_after_training = gr.Checkbox(
+            label='Remove GPU after training',
+            value=False,
+            interactive=bool(os.getenv('SPACE_ID')),
+            visible=False)
+        run_button = gr.Button('Start Training')
+        with gr.Box():
+            gr.Markdown('Output message')
+            output_message = gr.Markdown()
+        if pipe is not None:
+            run_button.click(fn=pipe.clear)
+        run_button.click(fn=trainer.run,
+                         inputs=[
+                             training_video,
+                             training_prompt,
+                             output_model_name,
+                             delete_existing_model,
+                             validation_prompt,
+                             base_model,
+                             resolution,
+                             num_training_steps,
+                             learning_rate,
+                             gradient_accumulation,
+                             seed,
+                             fp16,
+                             use_8bit_adam,
+                             checkpointing_steps,
+                             validation_epochs,
+                             upload_to_hub,
+                             use_private_repo,
+                             delete_existing_repo,
+                             upload_to,
+                             remove_gpu_after_training,
+                         ],
+                         outputs=output_message)
+    return demo
+if __name__ == '__main__':
+    hf_token = os.getenv('HF_TOKEN')
+    trainer = Trainer(hf_token)
+    demo = create_training_demo(trainer)
+    demo.queue(max_size=1).launch(share=False)

app_upload.py ADDED Viewed

	@@ -0,0 +1,100 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import pathlib
+import gradio as gr
+import slugify
+from constants import MODEL_LIBRARY_ORG_NAME, UploadTarget
+from uploader import Uploader
+from utils import find_exp_dirs
+class ModelUploader(Uploader):
+    def upload_model(
+        self,
+        folder_path: str,
+        repo_name: str,
+        upload_to: str,
+        private: bool,
+        delete_existing_repo: bool,
+    ) -> str:
+        if not folder_path:
+            raise ValueError
+        if not repo_name:
+            repo_name = pathlib.Path(folder_path).name
+        repo_name = slugify.slugify(repo_name)
+        if upload_to == UploadTarget.PERSONAL_PROFILE.value:
+            organization = ''
+        elif upload_to == UploadTarget.MODEL_LIBRARY.value:
+            organization = MODEL_LIBRARY_ORG_NAME
+        else:
+            raise ValueError
+        return self.upload(folder_path,
+                           repo_name,
+                           organization=organization,
+                           private=private,
+                           delete_existing_repo=delete_existing_repo)
+def load_local_model_list() -> dict:
+    choices = find_exp_dirs()
+    return gr.update(choices=choices, value=choices[0] if choices else None)
+def create_upload_demo(hf_token: str | None) -> gr.Blocks:
+    uploader = ModelUploader(hf_token)
+    model_dirs = find_exp_dirs()
+    with gr.Blocks() as demo:
+        with gr.Box():
+            gr.Markdown('Local Models')
+            reload_button = gr.Button('Reload Model List')
+            model_dir = gr.Dropdown(
+                label='Model names',
+                choices=model_dirs,
+                value=model_dirs[0] if model_dirs else None)
+        with gr.Box():
+            gr.Markdown('Upload Settings')
+            with gr.Row():
+                use_private_repo = gr.Checkbox(label='Private', value=True)
+                delete_existing_repo = gr.Checkbox(
+                    label='Delete existing repo of the same name', value=False)
+            upload_to = gr.Radio(label='Upload to',
+                                 choices=[_.value for _ in UploadTarget],
+                                 value=UploadTarget.MODEL_LIBRARY.value)
+            model_name = gr.Textbox(label='Model Name')
+        upload_button = gr.Button('Upload')
+        gr.Markdown(f'''
+            - You can upload your trained model to your personal profile (i.e. https://huggingface.co/{{your_username}}/{{model_name}}) or to the public [Tune-A-Video Library](https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}) (i.e. https://huggingface.co/{MODEL_LIBRARY_ORG_NAME}/{{model_name}}).
+            ''')
+        with gr.Box():
+            gr.Markdown('Output message')
+            output_message = gr.Markdown()
+        reload_button.click(fn=load_local_model_list,
+                            inputs=None,
+                            outputs=model_dir)
+        upload_button.click(fn=uploader.upload_model,
+                            inputs=[
+                                model_dir,
+                                model_name,
+                                upload_to,
+                                use_private_repo,
+                                delete_existing_repo,
+                            ],
+                            outputs=output_message)
+    return demo
+if __name__ == '__main__':
+    import os
+    hf_token = os.getenv('HF_TOKEN')
+    demo = create_upload_demo(hf_token)
+    demo.queue(max_size=1).launch(share=False)

constants.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import enum
+class UploadTarget(enum.Enum):
+    PERSONAL_PROFILE = 'Personal Profile'
+    MODEL_LIBRARY = 'Tune-A-Video Library'
+MODEL_LIBRARY_ORG_NAME = 'Tune-A-Video-library'
+SAMPLE_MODEL_REPO = 'Tune-A-Video-library/a-man-is-surfing'

inference.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from __future__ import annotations
+import gc
+import pathlib
+import sys
+import tempfile
+import gradio as gr
+import imageio
+import PIL.Image
+import torch
+from diffusers.utils.import_utils import is_xformers_available
+from einops import rearrange
+from huggingface_hub import ModelCard
+sys.path.append('Tune-A-Video')
+from tuneavideo.models.unet import UNet3DConditionModel
+from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline
+class InferencePipeline:
+    def __init__(self, hf_token: str | None = None):
+        self.hf_token = hf_token
+        self.pipe = None
+        self.device = torch.device(
+            'cuda:0' if torch.cuda.is_available() else 'cpu')
+        self.model_id = None
+    def clear(self) -> None:
+        self.model_id = None
+        del self.pipe
+        self.pipe = None
+        torch.cuda.empty_cache()
+        gc.collect()
+    @staticmethod
+    def check_if_model_is_local(model_id: str) -> bool:
+        return pathlib.Path(model_id).exists()
+    @staticmethod
+    def get_model_card(model_id: str,
+                       hf_token: str | None = None) -> ModelCard:
+        if InferencePipeline.check_if_model_is_local(model_id):
+            card_path = (pathlib.Path(model_id) / 'README.md').as_posix()
+        else:
+            card_path = model_id
+        return ModelCard.load(card_path, token=hf_token)
+    @staticmethod
+    def get_base_model_info(model_id: str, hf_token: str | None = None) -> str:
+        card = InferencePipeline.get_model_card(model_id, hf_token)
+        return card.data.base_model
+    def load_pipe(self, model_id: str) -> None:
+        if model_id == self.model_id:
+            return
+        base_model_id = self.get_base_model_info(model_id, self.hf_token)
+        unet = UNet3DConditionModel.from_pretrained(
+            model_id,
+            subfolder='unet',
+            torch_dtype=torch.float16,
+            use_auth_token=self.hf_token)
+        pipe = TuneAVideoPipeline.from_pretrained(base_model_id,
+                                                  unet=unet,
+                                                  torch_dtype=torch.float16,
+                                                  use_auth_token=self.hf_token)
+        pipe = pipe.to(self.device)
+        if is_xformers_available():
+            pipe.unet.enable_xformers_memory_efficient_attention()
+        self.pipe = pipe
+        self.model_id = model_id  # type: ignore
+    def run(
+        self,
+        model_id: str,
+        prompt: str,
+        video_length: int,
+        fps: int,
+        seed: int,
+        n_steps: int,
+        guidance_scale: float,
+    ) -> PIL.Image.Image:
+        if not torch.cuda.is_available():
+            raise gr.Error('CUDA is not available.')
+        self.load_pipe(model_id)
+        generator = torch.Generator(device=self.device).manual_seed(seed)
+        out = self.pipe(
+            prompt,
+            video_length=video_length,
+            width=512,
+            height=512,
+            num_inference_steps=n_steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )  # type: ignore
+        frames = rearrange(out.videos[0], 'c t h w -> t h w c')
+        frames = (frames * 255).to(torch.uint8).numpy()
+        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        writer = imageio.get_writer(out_file.name, fps=fps)
+        for frame in frames:
+            writer.append_data(frame)
+        writer.close()
+        return out_file.name

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

patch ADDED Viewed

	@@ -0,0 +1,15 @@

+diff --git a/train_tuneavideo.py b/train_tuneavideo.py
+index 66d51b2..86b2a5d 100644
+--- a/train_tuneavideo.py
++++ b/train_tuneavideo.py
+@@ -94,8 +94,8 @@ def main(
+     # Handle the output folder creation
+     if accelerator.is_main_process:
+-        now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
+-        output_dir = os.path.join(output_dir, now)
++        #now = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
++        #output_dir = os.path.join(output_dir, now)
+         os.makedirs(output_dir, exist_ok=True)
+         OmegaConf.save(config, os.path.join(output_dir, 'config.yaml'))

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+accelerate==0.15.0
+bitsandbytes==0.35.4
+decord==0.6.0
+diffusers[torch]==0.11.1
+einops==0.6.0
+ftfy==6.1.1
+gradio==3.16.2
+huggingface-hub==0.12.0
+imageio==2.25.0
+imageio-ffmpeg==0.4.8
+omegaconf==2.3.0
+Pillow==9.4.0
+python-slugify==7.0.0
+tensorboard==2.11.2
+torch==1.13.1
+torchvision==0.14.1
+transformers==4.26.0
+triton==2.0.0.dev20221202
+xformers==0.0.16

style.css ADDED Viewed

	@@ -0,0 +1,3 @@

+h1 {
+  text-align: center;
+}

trainer.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from __future__ import annotations
+import datetime
+import os
+import pathlib
+import shlex
+import shutil
+import subprocess
+import sys
+import gradio as gr
+import slugify
+import torch
+from huggingface_hub import HfApi
+from omegaconf import OmegaConf
+from app_upload import ModelUploader
+from utils import save_model_card
+sys.path.append('Tune-A-Video')
+URL_TO_JOIN_MODEL_LIBRARY_ORG = 'https://huggingface.co/organizations/Tune-A-Video-library/share/YjTcaNJmKyeHFpMBioHhzBcTzCYddVErEk'
+class Trainer:
+    def __init__(self, hf_token: str | None = None):
+        self.hf_token = hf_token
+        self.api = HfApi(token=hf_token)
+        self.model_uploader = ModelUploader(hf_token)
+        self.checkpoint_dir = pathlib.Path('checkpoints')
+        self.checkpoint_dir.mkdir(exist_ok=True)
+    def download_base_model(self, base_model_id: str) -> str:
+        model_dir = self.checkpoint_dir / base_model_id
+        if not model_dir.exists():
+            org_name = base_model_id.split('/')[0]
+            org_dir = self.checkpoint_dir / org_name
+            org_dir.mkdir(exist_ok=True)
+            subprocess.run(shlex.split(
+                f'git clone https://huggingface.co/{base_model_id}'),
+                           cwd=org_dir)
+        return model_dir.as_posix()
+    def join_model_library_org(self) -> None:
+        subprocess.run(
+            shlex.split(
+                f'curl -X POST -H "Authorization: Bearer {self.hf_token}" -H "Content-Type: application/json" {URL_TO_JOIN_MODEL_LIBRARY_ORG}'
+            ))
+    def run(
+        self,
+        training_video: str,
+        training_prompt: str,
+        output_model_name: str,
+        overwrite_existing_model: bool,
+        validation_prompt: str,
+        base_model: str,
+        resolution_s: str,
+        n_steps: int,
+        learning_rate: float,
+        gradient_accumulation: int,
+        seed: int,
+        fp16: bool,
+        use_8bit_adam: bool,
+        checkpointing_steps: int,
+        validation_epochs: int,
+        upload_to_hub: bool,
+        use_private_repo: bool,
+        delete_existing_repo: bool,
+        upload_to: str,
+        remove_gpu_after_training: bool,
+    ) -> str:
+        if not torch.cuda.is_available():
+            raise gr.Error('CUDA is not available.')
+        if training_video is None:
+            raise gr.Error('You need to upload a video.')
+        if not training_prompt:
+            raise gr.Error('The training prompt is missing.')
+        if not validation_prompt:
+            raise gr.Error('The validation prompt is missing.')
+        resolution = int(resolution_s)
+        if not output_model_name:
+            timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
+            output_model_name = f'tune-a-video-{timestamp}'
+        output_model_name = slugify.slugify(output_model_name)
+        repo_dir = pathlib.Path(__file__).parent
+        output_dir = repo_dir / 'experiments' / output_model_name
+        if overwrite_existing_model or upload_to_hub:
+            shutil.rmtree(output_dir, ignore_errors=True)
+        output_dir.mkdir(parents=True)
+        if upload_to_hub:
+            self.join_model_library_org()
+        config = OmegaConf.load('Tune-A-Video/configs/man-surfing.yaml')
+        config.pretrained_model_path = self.download_base_model(base_model)
+        config.output_dir = output_dir.as_posix()
+        config.train_data.video_path = training_video.name  # type: ignore
+        config.train_data.prompt = training_prompt
+        config.train_data.n_sample_frames = 8
+        config.train_data.width = resolution
+        config.train_data.height = resolution
+        config.train_data.sample_start_idx = 0
+        config.train_data.sample_frame_rate = 1
+        config.validation_data.prompts = [validation_prompt]
+        config.validation_data.video_length = 8
+        config.validation_data.width = resolution
+        config.validation_data.height = resolution
+        config.validation_data.num_inference_steps = 50
+        config.validation_data.guidance_scale = 7.5
+        config.learning_rate = learning_rate
+        config.gradient_accumulation_steps = gradient_accumulation
+        config.train_batch_size = 1
+        config.max_train_steps = n_steps
+        config.checkpointing_steps = checkpointing_steps
+        config.validation_steps = validation_epochs
+        config.seed = seed
+        config.mixed_precision = 'fp16' if fp16 else ''
+        config.use_8bit_adam = use_8bit_adam
+        config_path = output_dir / 'config.yaml'
+        with open(config_path, 'w') as f:
+            OmegaConf.save(config, f)
+        command = f'accelerate launch Tune-A-Video/train_tuneavideo.py --config {config_path}'
+        subprocess.run(shlex.split(command))
+        save_model_card(save_dir=output_dir,
+                        base_model=base_model,
+                        training_prompt=training_prompt,
+                        test_prompt=validation_prompt,
+                        test_image_dir='samples')
+        message = 'Training completed!'
+        print(message)
+        if upload_to_hub:
+            upload_message = self.model_uploader.upload_model(
+                folder_path=output_dir.as_posix(),
+                repo_name=output_model_name,
+                upload_to=upload_to,
+                private=use_private_repo,
+                delete_existing_repo=delete_existing_repo)
+            print(upload_message)
+            message = message + '\n' + upload_message
+        if remove_gpu_after_training:
+            space_id = os.getenv('SPACE_ID')
+            if space_id:
+                self.api.request_space_hardware(repo_id=space_id,
+                                                hardware='cpu-basic')
+        return message

uploader.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from __future__ import annotations
+from huggingface_hub import HfApi
+class Uploader:
+    def __init__(self, hf_token: str | None):
+        self.api = HfApi(token=hf_token)
+    def get_username(self) -> str:
+        return self.api.whoami()['name']
+    def upload(self,
+               folder_path: str,
+               repo_name: str,
+               organization: str = '',
+               repo_type: str = 'model',
+               private: bool = True,
+               delete_existing_repo: bool = False) -> str:
+        if not folder_path:
+            raise ValueError
+        if not repo_name:
+            raise ValueError
+        if not organization:
+            organization = self.get_username()
+        repo_id = f'{organization}/{repo_name}'
+        if delete_existing_repo:
+            try:
+                self.api.delete_repo(repo_id, repo_type=repo_type)
+            except Exception:
+                pass
+        try:
+            self.api.create_repo(repo_id, repo_type=repo_type, private=private)
+            self.api.upload_folder(repo_id=repo_id,
+                                   folder_path=folder_path,
+                                   path_in_repo='.',
+                                   repo_type=repo_type)
+            url = f'https://huggingface.co/{repo_id}'
+            message = f'Your model was successfully uploaded to <a href="{url}" target="_blank">{url}</a>.'
+        except Exception as e:
+            message = str(e)
+        return message

utils.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from __future__ import annotations
+import pathlib
+def find_exp_dirs() -> list[str]:
+    repo_dir = pathlib.Path(__file__).parent
+    exp_root_dir = repo_dir / 'experiments'
+    if not exp_root_dir.exists():
+        return []
+    exp_dirs = sorted(exp_root_dir.glob('*'))
+    exp_dirs = [
+        exp_dir for exp_dir in exp_dirs
+        if (exp_dir / 'model_index.json').exists()
+    ]
+    return [path.relative_to(repo_dir).as_posix() for path in exp_dirs]
+def save_model_card(
+    save_dir: pathlib.Path,
+    base_model: str,
+    training_prompt: str,
+    test_prompt: str = '',
+    test_image_dir: str = '',
+) -> None:
+    image_str = ''
+    if test_prompt and test_image_dir:
+        image_paths = sorted((save_dir / test_image_dir).glob('*.gif'))
+        if image_paths:
+            image_path = image_paths[-1]
+            rel_path = image_path.relative_to(save_dir)
+            image_str = f'''## Samples
+Test prompt: {test_prompt}
+![{image_path.stem}]({rel_path})'''
+    model_card = f'''---
+license: creativeml-openrail-m
+base_model: {base_model}
+training_prompt: {training_prompt}
+tags:
+- stable-diffusion
+- stable-diffusion-diffusers
+- text-to-image
+- diffusers
+- text-to-video
+- tune-a-video
+inference: false
+---
+# Tune-A-Video - {save_dir.name}
+## Model description
+- Base model: [{base_model}](https://huggingface.co/{base_model})
+- Training prompt: {training_prompt}
+{image_str}
+## Related papers:
+- [Tune-A-Video](https://arxiv.org/abs/2212.11565): One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation
+- [Stable-Diffusion](https://arxiv.org/abs/2112.10752): High-Resolution Image Synthesis with Latent Diffusion Models
+'''
+    with open(save_dir / 'README.md', 'w') as f:
+        f.write(model_card)