hysts HF staff commited on
Commit
5228705
1 Parent(s): d0466ed

Update to use diffusers

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +55 -32
  3. requirements.txt +8 -6
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🚀
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 3.22.1
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 3.23.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -3,33 +3,14 @@
3
  from __future__ import annotations
4
 
5
  import os
6
- import pathlib
7
  import random
8
- import shlex
9
- import subprocess
10
 
11
  import gradio as gr
 
 
12
  import torch
13
- from huggingface_hub import snapshot_download
14
-
15
- if os.getenv('SYSTEM') == 'spaces':
16
- subprocess.run(shlex.split('pip uninstall -y modelscope'))
17
- subprocess.run(
18
- shlex.split('git clone https://github.com/modelscope/modelscope'),
19
- cwd='/tmp',
20
- env={'GIT_LFS_SKIP_SMUDGE': '1'})
21
- subprocess.run(shlex.split('git checkout fe67395'), cwd='/tmp/modelscope')
22
- subprocess.run(shlex.split('pip install .'), cwd='/tmp/modelscope')
23
-
24
- from modelscope.outputs import OutputKeys
25
- from modelscope.pipelines import pipeline
26
-
27
- model_dir = pathlib.Path('weights')
28
- if not model_dir.exists():
29
- model_dir.mkdir()
30
- snapshot_download('damo-vilab/modelscope-damo-text-to-video-synthesis',
31
- repo_type='model',
32
- local_dir=model_dir)
33
 
34
  DESCRIPTION = '# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)'
35
  DESCRIPTION += '\n<p>For Colab usage, you can view <a href="https://colab.research.google.com/drive/1uW1ZqswkQ9Z9bp5Nbo5z59cAn7I0hE6R?usp=sharing" style="text-decoration: underline;" target="_blank">this webpage</a>.(the latest update on 2023.03.21)</p>'
@@ -37,20 +18,43 @@ DESCRIPTION += '\n<p>This model can only be used for non-commercial purposes. To
37
  if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
38
  DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
39
 
40
- pipe = pipeline('text-to-video-synthesis', model_dir.as_posix())
 
 
41
 
 
 
 
 
 
 
42
 
43
- def generate(prompt: str, seed: int) -> str:
 
 
 
 
 
 
 
 
 
 
 
44
  if seed == -1:
45
  seed = random.randint(0, 1000000)
46
- torch.manual_seed(seed)
47
- return pipe({'text': prompt})[OutputKeys.OUTPUT_VIDEO]
 
 
 
 
48
 
49
 
50
  examples = [
51
- ['An astronaut riding a horse.', 0],
52
- ['A panda eating bamboo on a rock.', 0],
53
- ['Spiderman is surfing.', 0],
54
  ]
55
 
56
  with gr.Blocks(css='style.css') as demo:
@@ -75,8 +79,27 @@ with gr.Blocks(css='style.css') as demo:
75
  step=1,
76
  value=-1,
77
  info='If set to -1, a different seed will be used each time.')
78
-
79
- inputs = [prompt, seed]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  gr.Examples(examples=examples,
81
  inputs=inputs,
82
  outputs=result,
 
3
  from __future__ import annotations
4
 
5
  import os
 
6
  import random
7
+ import tempfile
 
8
 
9
  import gradio as gr
10
+ import imageio
11
+ import numpy as np
12
  import torch
13
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  DESCRIPTION = '# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)'
16
  DESCRIPTION += '\n<p>For Colab usage, you can view <a href="https://colab.research.google.com/drive/1uW1ZqswkQ9Z9bp5Nbo5z59cAn7I0hE6R?usp=sharing" style="text-decoration: underline;" target="_blank">this webpage</a>.(the latest update on 2023.03.21)</p>'
 
18
  if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
19
  DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
20
 
21
+ MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '200'))
22
+ DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES,
23
+ int(os.getenv('DEFAULT_NUM_FRAMES', '16')))
24
 
25
+ pipe = DiffusionPipeline.from_pretrained('damo-vilab/text-to-video-ms-1.7b',
26
+ torch_dtype=torch.float16,
27
+ variant='fp16')
28
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
29
+ pipe.enable_model_cpu_offload()
30
+ pipe.enable_vae_slicing()
31
 
32
+
33
+ def to_video(frames: list[np.ndarray], fps: int) -> str:
34
+ out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
35
+ writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
36
+ for frame in frames:
37
+ writer.append_data(frame)
38
+ writer.close()
39
+ return out_file.name
40
+
41
+
42
+ def generate(prompt: str, seed: int, num_frames: int,
43
+ num_inference_steps: int) -> str:
44
  if seed == -1:
45
  seed = random.randint(0, 1000000)
46
+ generator = torch.Generator().manual_seed(seed)
47
+ frames = pipe(prompt,
48
+ num_inference_steps=num_inference_steps,
49
+ num_frames=num_frames,
50
+ generator=generator).frames
51
+ return to_video(frames, 8)
52
 
53
 
54
  examples = [
55
+ ['An astronaut riding a horse.', 0, 16, 25],
56
+ ['A panda eating bamboo on a rock.', 0, 16, 25],
57
+ ['Spiderman is surfing.', 0, 16, 25],
58
  ]
59
 
60
  with gr.Blocks(css='style.css') as demo:
 
79
  step=1,
80
  value=-1,
81
  info='If set to -1, a different seed will be used each time.')
82
+ num_frames = gr.Slider(
83
+ label='Number of frames',
84
+ minimum=16,
85
+ maximum=MAX_NUM_FRAMES,
86
+ step=1,
87
+ value=16,
88
+ info=
89
+ 'Note that the content of the video also changes when you change the number of frames.'
90
+ )
91
+ num_inference_steps = gr.Slider(label='Number of inference steps',
92
+ minimum=10,
93
+ maximum=50,
94
+ step=1,
95
+ value=25)
96
+
97
+ inputs = [
98
+ prompt,
99
+ seed,
100
+ num_frames,
101
+ num_inference_steps,
102
+ ]
103
  gr.Examples(examples=examples,
104
  inputs=inputs,
105
  outputs=result,
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- decord==0.6.0
2
- fairseq==0.12.2
3
- gradio==3.22.1
4
- huggingface-hub==0.13.2
5
- modelscope[multi-modal]==1.4.1
6
- open_clip_torch==2.16.0
 
 
 
1
+ accelerate==0.17.1
2
+ git+https://github.com/huggingface/diffusers@9dc8444
3
+ gradio==3.23.0
4
+ huggingface-hub==0.13.3
5
+ imageio[ffmpeg]==2.26.1
6
+ torch==2.0.0
7
+ torchvision==0.15.1
8
+ transformers==4.27.2