zR commited on
Commit
f0ae709
1 Parent(s): c1dc654
Files changed (2) hide show
  1. README.md +7 -10
  2. README_zh.md +6 -9
README.md CHANGED
@@ -127,14 +127,14 @@ pip install --upgrade transformers accelerate diffusers imageio-ffmpeg
127
 
128
  2. Run the code
129
 
130
- ```python
131
  import torch
132
- from diffusers import CogVideoXImageToVideoPipeline
133
- from diffusers.utils import export_to_video, load_image
134
 
135
- prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
136
- image = load_image(image="input.jpg")
137
- pipe = CogVideoXImageToVideoPipeline.from_pretrained(
138
  "THUDM/CogVideoX1.5-5B",
139
  torch_dtype=torch.bfloat16
140
  )
@@ -145,7 +145,6 @@ pipe.vae.enable_slicing()
145
 
146
  video = pipe(
147
  prompt=prompt,
148
- image=image,
149
  num_videos_per_prompt=1,
150
  num_inference_steps=50,
151
  num_frames=81,
@@ -169,7 +168,7 @@ with `torch.compile`, which can significantly accelerate inference.
169
 
170
  import torch
171
  from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXImageToVideoPipeline
172
- from diffusers.utils import export_to_video, load_image
173
  from transformers import T5EncoderModel
174
  from torchao.quantization import quantize_, int8_weight_only
175
 
@@ -200,10 +199,8 @@ pipe.vae.enable_tiling()
200
  pipe.vae.enable_slicing()
201
 
202
  prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
203
- image = load_image(image="input.jpg")
204
  video = pipe(
205
  prompt=prompt,
206
- image=image,
207
  num_videos_per_prompt=1,
208
  num_inference_steps=50,
209
  num_frames=81,
 
127
 
128
  2. Run the code
129
 
130
+ ```python
131
  import torch
132
+ from diffusers import CogVideoXPipeline
133
+ from diffusers.utils import export_to_video
134
 
135
+ prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
136
+
137
+ pipe = CogVideoXPipeline.from_pretrained(
138
  "THUDM/CogVideoX1.5-5B",
139
  torch_dtype=torch.bfloat16
140
  )
 
145
 
146
  video = pipe(
147
  prompt=prompt,
 
148
  num_videos_per_prompt=1,
149
  num_inference_steps=50,
150
  num_frames=81,
 
168
 
169
  import torch
170
  from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXImageToVideoPipeline
171
+ from diffusers.utils import export_to_video
172
  from transformers import T5EncoderModel
173
  from torchao.quantization import quantize_, int8_weight_only
174
 
 
199
  pipe.vae.enable_slicing()
200
 
201
  prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
 
202
  video = pipe(
203
  prompt=prompt,
 
204
  num_videos_per_prompt=1,
205
  num_inference_steps=50,
206
  num_frames=81,
README_zh.md CHANGED
@@ -109,12 +109,12 @@ pip install --upgrade transformers accelerate diffusers imageio-ffmpeg
109
 
110
  ```python
111
  import torch
112
- from diffusers import CogVideoXImageToVideoPipeline
113
- from diffusers.utils import export_to_video, load_image
114
 
115
- prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
116
- image = load_image(image="input.jpg")
117
- pipe = CogVideoXImageToVideoPipeline.from_pretrained(
118
  "THUDM/CogVideoX1.5-5B",
119
  torch_dtype=torch.bfloat16
120
  )
@@ -125,7 +125,6 @@ pipe.vae.enable_slicing()
125
 
126
  video = pipe(
127
  prompt=prompt,
128
- image=image,
129
  num_videos_per_prompt=1,
130
  num_inference_steps=50,
131
  num_frames=81,
@@ -148,7 +147,7 @@ GPU 上运行该模型成为可能!值得注意的是,TorchAO 量化与 `tor
148
 
149
  import torch
150
  from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXImageToVideoPipeline
151
- from diffusers.utils import export_to_video, load_image
152
  from transformers import T5EncoderModel
153
  from torchao.quantization import quantize_, int8_weight_only
154
 
@@ -177,10 +176,8 @@ pipe.vae.enable_tiling()
177
  pipe.vae.enable_slicing()
178
 
179
  prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
180
- image = load_image(image="input.jpg")
181
  video = pipe(
182
  prompt=prompt,
183
- image=image,
184
  num_videos_per_prompt=1,
185
  num_inference_steps=50,
186
  num_frames=81,
 
109
 
110
  ```python
111
  import torch
112
+ from diffusers import CogVideoXPipeline
113
+ from diffusers.utils import export_to_video
114
 
115
+ prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
116
+
117
+ pipe = CogVideoXPipeline.from_pretrained(
118
  "THUDM/CogVideoX1.5-5B",
119
  torch_dtype=torch.bfloat16
120
  )
 
125
 
126
  video = pipe(
127
  prompt=prompt,
 
128
  num_videos_per_prompt=1,
129
  num_inference_steps=50,
130
  num_frames=81,
 
147
 
148
  import torch
149
  from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXImageToVideoPipeline
150
+ from diffusers.utils import export_to_video
151
  from transformers import T5EncoderModel
152
  from torchao.quantization import quantize_, int8_weight_only
153
 
 
176
  pipe.vae.enable_slicing()
177
 
178
  prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
 
179
  video = pipe(
180
  prompt=prompt,
 
181
  num_videos_per_prompt=1,
182
  num_inference_steps=50,
183
  num_frames=81,