aach456 commited on
Commit
f40c31f
1 Parent(s): 49131b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -13
app.py CHANGED
@@ -1,42 +1,51 @@
1
  import streamlit as st
2
  from PIL import Image
3
  import torch
4
- import skvideo.io
5
- from diffusers import I2VGenXLPipeline
6
- from diffusers.utils import export_to_video, load_image
7
  import numpy as np
8
- import imageio
9
  from moviepy.editor import ImageSequenceClip
10
  from transformers import MusicgenForConditionalGeneration, AutoProcessor
11
  from scipy.io import wavfile
12
  import ffmpeg
13
 
 
14
  def generate_video(image, prompt, negative_prompt, video_length):
15
  generator = torch.manual_seed(8888)
16
  device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
17
  print(f"Using device: {device}")
18
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float32)
19
  pipeline.to(device)
20
- frames = pipeline(
21
- prompt=prompt,
22
- image=image,
23
- num_inference_steps=2,
24
- negative_prompt=negative_prompt,
25
- guidance_scale=9.0,
26
- generator=generator,
27
- num_frames=video_length*20
28
- ).frames[0]
 
 
 
 
 
 
 
 
 
29
  return frames
30
 
 
31
  def export_frames_to_video(frames, output_file):
32
  frames_np = [np.array(frame) for frame in frames]
33
  clip = ImageSequenceClip(frames_np, fps=30)
34
  clip.write_videofile(output_file, codec='libx264', audio=False)
35
 
 
36
  def generate_music(prompt, unconditional=False):
37
  model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
38
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
39
  model.to(device)
 
40
  if unconditional:
41
  unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
42
  audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
@@ -48,15 +57,18 @@ def generate_music(prompt, unconditional=False):
48
  return_tensors="pt",
49
  )
50
  audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
 
51
  sampling_rate = model.config.audio_encoder.sampling_rate
52
  return audio_values[0].cpu().numpy(), sampling_rate
53
 
 
54
  def combine_audio_video(audio_file, video_file, output_file):
55
  audio = ffmpeg.input(audio_file)
56
  video = ffmpeg.input(video_file)
57
  output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
58
  ffmpeg.run(output)
59
 
 
60
  st.title("AI-Powered Video and Music Generation")
61
 
62
  st.sidebar.title("Options")
@@ -74,13 +86,20 @@ unconditional = st.sidebar.checkbox("Generate unconditional music")
74
  if st.sidebar.button("Generate Video and Music"):
75
  if image is not None:
76
  image = Image.open(image)
 
 
 
77
  frames = generate_video(image, prompt, negative_prompt, video_length)
78
  export_frames_to_video(frames, "output_video.mp4")
79
  st.video("output_video.mp4")
80
 
 
 
81
  audio_values, sampling_rate = generate_music(music_prompt, unconditional)
82
  wavfile.write("musicgen_out.wav", sampling_rate, audio_values)
83
  st.audio("musicgen_out.wav")
84
 
 
 
85
  combine_audio_video("musicgen_out.wav", "output_video.mp4", "combined_output.mp4")
86
  st.video("combined_output.mp4")
 
1
  import streamlit as st
2
  from PIL import Image
3
  import torch
 
 
 
4
  import numpy as np
 
5
  from moviepy.editor import ImageSequenceClip
6
  from transformers import MusicgenForConditionalGeneration, AutoProcessor
7
  from scipy.io import wavfile
8
  import ffmpeg
9
 
10
+ # Function to generate video frames
11
  def generate_video(image, prompt, negative_prompt, video_length):
12
  generator = torch.manual_seed(8888)
13
  device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
14
  print(f"Using device: {device}")
15
  pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float32)
16
  pipeline.to(device)
17
+
18
+ frames = []
19
+ total_frames = video_length * 20 # Assuming 20 frames per second
20
+
21
+ # Generate frames with progress tracking
22
+ for i in range(total_frames):
23
+ frame = pipeline(
24
+ prompt=prompt,
25
+ image=image,
26
+ num_inference_steps=2,
27
+ negative_prompt=negative_prompt,
28
+ guidance_scale=9.0,
29
+ generator=generator,
30
+ num_frames=1
31
+ ).frames[0]
32
+ frames.append(frame)
33
+ st.progress((i + 1) / total_frames) # Update progress bar
34
+
35
  return frames
36
 
37
+ # Function to export frames to video
38
  def export_frames_to_video(frames, output_file):
39
  frames_np = [np.array(frame) for frame in frames]
40
  clip = ImageSequenceClip(frames_np, fps=30)
41
  clip.write_videofile(output_file, codec='libx264', audio=False)
42
 
43
+ # Function to generate music
44
  def generate_music(prompt, unconditional=False):
45
  model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
46
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
47
  model.to(device)
48
+
49
  if unconditional:
50
  unconditional_inputs = model.get_unconditional_inputs(num_samples=1)
51
  audio_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=256)
 
57
  return_tensors="pt",
58
  )
59
  audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
60
+
61
  sampling_rate = model.config.audio_encoder.sampling_rate
62
  return audio_values[0].cpu().numpy(), sampling_rate
63
 
64
+ # Function to combine audio and video
65
  def combine_audio_video(audio_file, video_file, output_file):
66
  audio = ffmpeg.input(audio_file)
67
  video = ffmpeg.input(video_file)
68
  output = ffmpeg.output(video, audio, output_file, vcodec='copy', acodec='aac')
69
  ffmpeg.run(output)
70
 
71
+ # Streamlit UI
72
  st.title("AI-Powered Video and Music Generation")
73
 
74
  st.sidebar.title("Options")
 
86
  if st.sidebar.button("Generate Video and Music"):
87
  if image is not None:
88
  image = Image.open(image)
89
+
90
+ # Video generation with progress bar
91
+ st.write("Generating video...")
92
  frames = generate_video(image, prompt, negative_prompt, video_length)
93
  export_frames_to_video(frames, "output_video.mp4")
94
  st.video("output_video.mp4")
95
 
96
+ # Music generation with progress bar
97
+ st.write("Generating music...")
98
  audio_values, sampling_rate = generate_music(music_prompt, unconditional)
99
  wavfile.write("musicgen_out.wav", sampling_rate, audio_values)
100
  st.audio("musicgen_out.wav")
101
 
102
+ # Combine audio and video
103
+ st.write("Combining audio and video...")
104
  combine_audio_video("musicgen_out.wav", "output_video.mp4", "combined_output.mp4")
105
  st.video("combined_output.mp4")