Spaces:

THUDM
/

CogVideo

Running

App Files Files Community

hysts commited on Jul 21, 2022

Commit

6992dd6

•

1 Parent(s): 8aac645

Update for HF Space

Browse files

Files changed (5) hide show

README.md +2 -1
app.py +16 -24
model.py +60 -3
requirements.txt +3 -6
prompt.txt → samples.txt +0 -0

README.md CHANGED Viewed

@@ -4,7 +4,8 @@ emoji: 🌍
 colorFrom: indigo
 colorTo: yellow
 sdk: gradio
-sdk_version: 3.0.26
 app_file: app.py
 pinned: false
 ---

 colorFrom: indigo
 colorTo: yellow
 sdk: gradio
+sdk_version: 3.1.0
+python_version: 3.9.13
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -2,26 +2,20 @@
 from __future__ import annotations
-import argparse
 import gradio as gr
 from model import AppModel
 DESCRIPTION = '''# <a href="https://github.com/THUDM/CogVideo">CogVideo</a>
-The model takes only Chinese as input.
-If you check the "Translate to Chinese" checkbox, the app will use the English to Chinese translation results with [this Space](https://huggingface.co/spaces/chinhon/translation_eng2ch) as input.
-But the translation model may mistranslate and the results could be poor.
-So, it is also a good idea to input the translation results from other translation services.
-'''
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--only-first-stage', action='store_true')
-    parser.add_argument('--share', action='store_true')
-    return parser.parse_args()
 def set_example_text(example: list) -> dict:
@@ -29,8 +23,8 @@ def set_example_text(example: list) -> dict:
 def main():
-    args = parse_args()
-    model = AppModel(args.only_first_stage)
     with gr.Blocks(css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
@@ -48,14 +42,12 @@ def main():
                                      label='Seed')
                     only_first_stage = gr.Checkbox(
                         label='Only First Stage',
-                        value=args.only_first_stage,
-                        visible=not args.only_first_stage)
                     run_button = gr.Button('Run')
                     with open('samples.txt') as f:
-                        samples = [
-                            line.strip().split('\t') for line in f.readlines()
-                        ]
                     examples = gr.Dataset(components=[text], samples=samples)
             with gr.Column():
@@ -67,6 +59,9 @@ def main():
                         with gr.TabItem('Output (Gallery)'):
                             result_gallery = gr.Gallery(show_label=False)
         run_button.click(fn=model.run_with_translation,
                          inputs=[
                              text,
@@ -83,10 +78,7 @@ def main():
                        inputs=examples,
                        outputs=examples.components)
-    demo.launch(
-        enable_queue=True,
-        share=args.share,
-    )
 if __name__ == '__main__':

 from __future__ import annotations
 import gradio as gr
 from model import AppModel
 DESCRIPTION = '''# <a href="https://github.com/THUDM/CogVideo">CogVideo</a>
+Currently, this Space only supports the first stage of the CogVideo pipeline due to hardware limitations.
+The model accepts only Chinese as input.
+By checking the "Translate to Chinese" checkbox, the results of English to Chinese translation with [this Space](https://huggingface.co/spaces/chinhon/translation_eng2ch) will be used as input.
+Since the translation model may mistranslate, you may want to use the translation results from other translation services.
+'''
+NOTES = 'This app is adapted from https://github.com/hysts/CogVideo_demo. It would be recommended to use the repo if you want to run the app yourself.'
+FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=THUDM.CogVideo" />'
 def set_example_text(example: list) -> dict:
 def main():
+    only_first_stage = True
+    model = AppModel(only_first_stage)
     with gr.Blocks(css='style.css') as demo:
         gr.Markdown(DESCRIPTION)
                                      label='Seed')
                     only_first_stage = gr.Checkbox(
                         label='Only First Stage',
+                        value=only_first_stage,
+                        visible=not only_first_stage)
                     run_button = gr.Button('Run')
                     with open('samples.txt') as f:
+                        samples = [[line.strip()] for line in f.readlines()]
                     examples = gr.Dataset(components=[text], samples=samples)
             with gr.Column():
                         with gr.TabItem('Output (Gallery)'):
                             result_gallery = gr.Gallery(show_label=False)
+        gr.Markdown(NOTES)
+        gr.Markdown(FOOTER)
         run_button.click(fn=model.run_with_translation,
                          inputs=[
                              text,
                        inputs=examples,
                        outputs=examples.components)
+    demo.launch(enable_queue=True, share=False)
 if __name__ == '__main__':

model.py CHANGED Viewed

@@ -1,16 +1,72 @@
-# This code is adapted from https://github.com/THUDM/CogView2/blob/4e55cce981eb94b9c8c1f19ba9f632fd3ee42ba8/cogview2_text2image.py
 from __future__ import annotations
 import argparse
-import functools
 import logging
 import pathlib
 import sys
 import tempfile
 import time
 from typing import Any
 import gradio as gr
 import imageio.v2 as iio
 import numpy as np
@@ -1116,6 +1172,7 @@ class Model:
         start = time.perf_counter()
         set_random_seed(seed)
         if only_first_stage:
             self.args.stage_1 = True
@@ -1169,7 +1226,7 @@ class AppModel(Model):
     def run_with_translation(
         self, text: str, translate: bool, seed: int, only_first_stage: bool
-    ) -> tuple[str | None, np.ndarray | None, list[np.ndarray] | None]:
         logger.info(f'{text=}, {translate=}, {seed=}, {only_first_stage=}')
         if translate:
             text = translated_text = self.translator(text)

+# This code is adapted from https://github.com/THUDM/CogVideo/blob/ff423aa169978fb2f636f761e348631fa3178b03/cogvideo_pipeline.py
 from __future__ import annotations
 import argparse
 import logging
+import os
 import pathlib
+import shutil
+import subprocess
 import sys
 import tempfile
 import time
+import zipfile
 from typing import Any
+if os.getenv('SYSTEM') == 'spaces':
+    subprocess.run('pip install icetk==0.0.4'.split())
+    subprocess.run('pip install SwissArmyTransformer==0.2.9'.split())
+    subprocess.run(
+        'pip install git+https://github.com/Sleepychord/Image-Local-Attention@43fee31'
+        .split())
+    #subprocess.run('git clone https://github.com/NVIDIA/apex'.split())
+    #subprocess.run('git checkout 1403c21'.split(), cwd='apex')
+    #with open('patch.apex') as f:
+    #    subprocess.run('patch -p1'.split(), cwd='apex', stdin=f)
+    #subprocess.run(
+    #    'pip install -v --disable-pip-version-check --no-cache-dir --global-option --cpp_ext --global-option --cuda_ext ./'
+    #    .split(),
+    #    cwd='apex')
+    #subprocess.run('rm -rf apex'.split())
+    with open('patch') as f:
+        subprocess.run('patch -p1'.split(), cwd='CogVideo', stdin=f)
+    from huggingface_hub import hf_hub_download
+    def download_and_extract_icetk_models() -> None:
+        icetk_model_dir = pathlib.Path('/home/user/.icetk_models')
+        icetk_model_dir.mkdir()
+        path = hf_hub_download('THUDM/icetk',
+                               'models.zip',
+                               use_auth_token=os.getenv('HF_TOKEN'))
+        with zipfile.ZipFile(path) as f:
+            f.extractall(path=icetk_model_dir.as_posix())
+    def download_and_extract_cogvideo_models(name: str) -> None:
+        path = hf_hub_download('THUDM/CogVideo',
+                               name,
+                               use_auth_token=os.getenv('HF_TOKEN'))
+        with zipfile.ZipFile(path) as f:
+            f.extractall('pretrained')
+        os.remove(path)
+    def download_and_extract_cogview2_models(name: str) -> None:
+        path = hf_hub_download('THUDM/CogView2', name)
+        with zipfile.ZipFile(path) as f:
+            f.extractall()
+        shutil.move('/home/user/app/sharefs/cogview-new/cogview2-dsr',
+                    'pretrained')
+        shutil.rmtree('/home/user/app/sharefs/')
+        os.remove(path)
+    download_and_extract_icetk_models()
+    download_and_extract_cogvideo_models('cogvideo-stage1.zip')
+    #download_and_extract_cogvideo_models('cogvideo-stage2.zip')
+    #download_and_extract_cogview2_models('cogview2-dsr.zip')
+    os.environ['SAT_HOME'] = '/home/user/app/pretrained'
 import gradio as gr
 import imageio.v2 as iio
 import numpy as np
         start = time.perf_counter()
         set_random_seed(seed)
+        self.args.seed = seed
         if only_first_stage:
             self.args.stage_1 = True
     def run_with_translation(
         self, text: str, translate: bool, seed: int, only_first_stage: bool
+    ) -> tuple[str | None, str | None, list[np.ndarray] | None]:
         logger.info(f'{text=}, {translate=}, {seed=}, {only_first_stage=}')
         if translate:
             text = translated_text = self.translator(text)

requirements.txt CHANGED Viewed

@@ -1,10 +1,7 @@
-git+https://github.com/Sleepychord/Image-Local-Attention@43fee31
-gradio==3.1.0
-icetk==0.0.4
 imageio==2.19.5
 imageio-ffmpeg==0.4.7
 numpy==1.22.4
 opencv-python-headless==4.6.0.66
-SwissArmyTransformer==0.2.9
-torch==1.12.0
-torchvision==0.13.0

+--extra-index-url https://download.pytorch.org/whl/cu113
 imageio==2.19.5
 imageio-ffmpeg==0.4.7
 numpy==1.22.4
 opencv-python-headless==4.6.0.66
+torch==1.12.0+cu113
+torchvision==0.13.0+cu113

prompt.txt → samples.txt RENAMED Viewed

File without changes