ihsanvp commited on
Commit
448a859
1 Parent(s): 95cc45b

fix: local with spaces

Browse files
Files changed (2) hide show
  1. app.py +26 -29
  2. app_local.py +29 -26
app.py CHANGED
@@ -6,7 +6,7 @@ from torchvision.transforms.functional import to_tensor
6
  from PIL import Image
7
 
8
  if gr.NO_RELOAD:
9
- n_steps = 40
10
  high_noise_frac = 0.8
11
  negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
  generator = torch.manual_seed(8888)
@@ -17,39 +17,37 @@ if gr.NO_RELOAD:
17
  variant="fp16",
18
  use_safetensors=True,
19
  )
20
- # refiner = DiffusionPipeline.from_pretrained(
21
- # "stabilityai/stable-diffusion-xl-refiner-1.0",
22
- # text_encoder_2=base.text_encoder_2,
23
- # vae=base.vae,
24
- # torch_dtype=torch.float16,
25
- # use_safetensors=True,
26
- # variant="fp16",
27
- # )
28
- # refiner.to("cuda")
29
- # base.to("cuda")
30
- # refiner.enable_model_cpu_offload()
31
- base.enable_model_cpu_offload()
32
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
33
- pipeline.enable_model_cpu_offload()
34
- pipeline.unet.enable_forward_chunking()
 
 
 
 
 
 
35
 
36
  def generate(prompt: str):
37
  image = base(
38
  prompt=prompt,
39
  num_inference_steps=n_steps,
40
- # denoising_end=high_noise_frac,
41
- # output_type="latent",
 
 
 
 
 
 
42
  ).images[0]
43
- # image = refiner(
44
- # prompt=prompt,
45
- # num_inference_steps=n_steps,
46
- # denoising_start=high_noise_frac,
47
- # image=image,
48
- # ).images[0]
49
- # print(image)
50
- # print(type(image))
51
- # print(image.size())
52
- image.save("frame.jpg")
53
  image = to_tensor(image)
54
  frames: list[Image.Image] = pipeline(
55
  prompt=prompt,
@@ -58,11 +56,10 @@ def generate(prompt: str):
58
  negative_prompt=negative_prompt,
59
  guidance_scale=9.0,
60
  generator=generator,
61
- decode_chunk_size=6,
62
  ).frames[0]
63
  frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
64
  frames = torch.stack(frames)
65
- torchvision.io.write_video("video.mp4", frames, fps=4)
66
  return "video.mp4"
67
 
68
  app = gr.Interface(
 
6
  from PIL import Image
7
 
8
  if gr.NO_RELOAD:
9
+ n_steps = 50
10
  high_noise_frac = 0.8
11
  negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
  generator = torch.manual_seed(8888)
 
17
  variant="fp16",
18
  use_safetensors=True,
19
  )
20
+ refiner = DiffusionPipeline.from_pretrained(
21
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
22
+ text_encoder_2=base.text_encoder_2,
23
+ vae=base.vae,
24
+ torch_dtype=torch.float16,
25
+ use_safetensors=True,
26
+ variant="fp16",
27
+ )
 
 
 
 
28
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
29
+
30
+ base.to("cuda")
31
+ refiner.to("cuda")
32
+ pipeline.to("cuda")
33
+
34
+ base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
35
+ refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
36
+ pipeline.unet = torch.compile(pipeline.unet, mode="reduce-overhead", fullgraph=True)
37
 
38
  def generate(prompt: str):
39
  image = base(
40
  prompt=prompt,
41
  num_inference_steps=n_steps,
42
+ denoising_end=high_noise_frac,
43
+ output_type="latent",
44
+ ).images[0]
45
+ image = refiner(
46
+ prompt=prompt,
47
+ num_inference_steps=n_steps,
48
+ denoising_start=high_noise_frac,
49
+ image=image,
50
  ).images[0]
 
 
 
 
 
 
 
 
 
 
51
  image = to_tensor(image)
52
  frames: list[Image.Image] = pipeline(
53
  prompt=prompt,
 
56
  negative_prompt=negative_prompt,
57
  guidance_scale=9.0,
58
  generator=generator,
 
59
  ).frames[0]
60
  frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
61
  frames = torch.stack(frames)
62
+ torchvision.io.write_video("video.mp4", frames, fps=8)
63
  return "video.mp4"
64
 
65
  app = gr.Interface(
app_local.py CHANGED
@@ -6,7 +6,7 @@ from torchvision.transforms.functional import to_tensor
6
  from PIL import Image
7
 
8
  if gr.NO_RELOAD:
9
- n_steps = 50
10
  high_noise_frac = 0.8
11
  negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
  generator = torch.manual_seed(8888)
@@ -17,37 +17,39 @@ if gr.NO_RELOAD:
17
  variant="fp16",
18
  use_safetensors=True,
19
  )
20
- refiner = DiffusionPipeline.from_pretrained(
21
- "stabilityai/stable-diffusion-xl-refiner-1.0",
22
- text_encoder_2=base.text_encoder_2,
23
- vae=base.vae,
24
- torch_dtype=torch.float16,
25
- use_safetensors=True,
26
- variant="fp16",
27
- )
 
 
 
 
28
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
29
-
30
- base.to("cuda")
31
- refiner.to("cuda")
32
- pipeline.to("cuda")
33
-
34
- base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
35
- refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
36
- pipeline.unet = torch.compile(pipeline.unet, mode="reduce-overhead", fullgraph=True)
37
 
38
  def generate(prompt: str):
39
  image = base(
40
  prompt=prompt,
41
  num_inference_steps=n_steps,
42
- denoising_end=high_noise_frac,
43
- output_type="latent",
44
- ).images[0]
45
- image = refiner(
46
- prompt=prompt,
47
- num_inference_steps=n_steps,
48
- denoising_start=high_noise_frac,
49
- image=image,
50
  ).images[0]
 
 
 
 
 
 
 
 
 
 
51
  image = to_tensor(image)
52
  frames: list[Image.Image] = pipeline(
53
  prompt=prompt,
@@ -56,10 +58,11 @@ def generate(prompt: str):
56
  negative_prompt=negative_prompt,
57
  guidance_scale=9.0,
58
  generator=generator,
 
59
  ).frames[0]
60
  frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
61
  frames = torch.stack(frames)
62
- torchvision.io.write_video("video.mp4", frames, fps=8)
63
  return "video.mp4"
64
 
65
  app = gr.Interface(
 
6
  from PIL import Image
7
 
8
  if gr.NO_RELOAD:
9
+ n_steps = 40
10
  high_noise_frac = 0.8
11
  negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
  generator = torch.manual_seed(8888)
 
17
  variant="fp16",
18
  use_safetensors=True,
19
  )
20
+ # refiner = DiffusionPipeline.from_pretrained(
21
+ # "stabilityai/stable-diffusion-xl-refiner-1.0",
22
+ # text_encoder_2=base.text_encoder_2,
23
+ # vae=base.vae,
24
+ # torch_dtype=torch.float16,
25
+ # use_safetensors=True,
26
+ # variant="fp16",
27
+ # )
28
+ # refiner.to("cuda")
29
+ # base.to("cuda")
30
+ # refiner.enable_model_cpu_offload()
31
+ base.enable_model_cpu_offload()
32
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
33
+ pipeline.enable_model_cpu_offload()
34
+ pipeline.unet.enable_forward_chunking()
 
 
 
 
 
 
35
 
36
  def generate(prompt: str):
37
  image = base(
38
  prompt=prompt,
39
  num_inference_steps=n_steps,
40
+ # denoising_end=high_noise_frac,
41
+ # output_type="latent",
 
 
 
 
 
 
42
  ).images[0]
43
+ # image = refiner(
44
+ # prompt=prompt,
45
+ # num_inference_steps=n_steps,
46
+ # denoising_start=high_noise_frac,
47
+ # image=image,
48
+ # ).images[0]
49
+ # print(image)
50
+ # print(type(image))
51
+ # print(image.size())
52
+ image.save("frame.jpg")
53
  image = to_tensor(image)
54
  frames: list[Image.Image] = pipeline(
55
  prompt=prompt,
 
58
  negative_prompt=negative_prompt,
59
  guidance_scale=9.0,
60
  generator=generator,
61
+ decode_chunk_size=6,
62
  ).frames[0]
63
  frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
64
  frames = torch.stack(frames)
65
+ torchvision.io.write_video("video.mp4", frames, fps=4)
66
  return "video.mp4"
67
 
68
  app = gr.Interface(