fdsodfs commited on
Commit
3b6cf03
1 Parent(s): 526b755
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -2,9 +2,37 @@ import gradio as gr
2
  import torch
3
  from torch import autocast
4
  from diffusers import StableDiffusionPipeline
 
 
5
 
 
 
6
 
7
- def get_stable_diffusion_random_image(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  model_id = "CompVis/stable-diffusion-v1-4"
9
  device = "cuda"
10
  pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
@@ -13,7 +41,7 @@ def get_stable_diffusion_random_image(prompt):
13
  image = pipe(prompt, guidance_scale=7.5)["sample"][0]
14
  print(image)
15
  return image
 
16
 
17
-
18
- iface = gr.Interface(fn=get_stable_diffusion_random_image, inputs=["text", "audio"], outputs="video")
19
- iface.launch(share=True)
 
2
  import torch
3
  from torch import autocast
4
  from diffusers import StableDiffusionPipeline
5
+ import argparse
6
+ from moviepy.editor import AudioFileClip, ImageClip
7
 
8
+ parser = argparse.ArgumentParser()
9
+ setshare = parser.add_argument('--setshare', default=True, action=argparse.BooleanOptionalAction)
10
 
11
+ def process_inputs(prompt, audio):
12
+ image = get_stable_diffusion_image(prompt)
13
+ video = add_static_image_to_audio(image, audio)
14
+ return video
15
+
16
+
17
+
18
+ def add_static_image_to_audio(image, audio):
19
+ """Create and save a video file to `output_path` after
20
+ combining a static image that is located in `image_path`
21
+ with an audio file in `audio_path`"""
22
+ # create the audio clip object
23
+ audio_clip = AudioFileClip(audio)
24
+ # create the image clip object
25
+ image_clip = ImageClip(image)
26
+ # use set_audio method from image clip to combine the audio with the image
27
+ video_clip = image_clip.set_audio(audio)
28
+ # specify the duration of the new clip to be the duration of the audio clip
29
+ video_clip.duration = audio.duration
30
+ # set the FPS to 1
31
+ video_clip.fps = 1
32
+ # write the resuling video clip
33
+ return video_clip
34
+
35
+ def get_stable_diffusion_image(prompt):
36
  model_id = "CompVis/stable-diffusion-v1-4"
37
  device = "cuda"
38
  pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
 
41
  image = pipe(prompt, guidance_scale=7.5)["sample"][0]
42
  print(image)
43
  return image
44
+
45
 
46
+ iface = gr.Interface(fn=process_inputs, inputs=["text", "audio"], outputs="video")
47
+ iface.launch(share=setshare)
 
app.py CHANGED
@@ -2,9 +2,37 @@ import gradio as gr
2
  import torch
3
  from torch import autocast
4
  from diffusers import StableDiffusionPipeline
 
 
5
 
 
 
6
 
7
- def get_stable_diffusion_random_image(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  model_id = "CompVis/stable-diffusion-v1-4"
9
  device = "cuda"
10
  pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
@@ -13,7 +41,7 @@ def get_stable_diffusion_random_image(prompt):
13
  image = pipe(prompt, guidance_scale=7.5)["sample"][0]
14
  print(image)
15
  return image
 
16
 
17
-
18
- iface = gr.Interface(fn=get_stable_diffusion_random_image, inputs=["text", "audio"], outputs="video")
19
- iface.launch(share=True)
 
2
  import torch
3
  from torch import autocast
4
  from diffusers import StableDiffusionPipeline
5
+ import argparse
6
+ from moviepy.editor import AudioFileClip, ImageClip
7
 
8
+ parser = argparse.ArgumentParser()
9
+ setshare = parser.add_argument('--setshare', default=True, action=argparse.BooleanOptionalAction)
10
 
11
+ def process_inputs(prompt, audio):
12
+ image = get_stable_diffusion_image(prompt)
13
+ video = add_static_image_to_audio(image, audio)
14
+ return video
15
+
16
+
17
+
18
+ def add_static_image_to_audio(image, audio):
19
+ """Create and save a video file to `output_path` after
20
+ combining a static image that is located in `image_path`
21
+ with an audio file in `audio_path`"""
22
+ # create the audio clip object
23
+ audio_clip = AudioFileClip(audio)
24
+ # create the image clip object
25
+ image_clip = ImageClip(image)
26
+ # use set_audio method from image clip to combine the audio with the image
27
+ video_clip = image_clip.set_audio(audio)
28
+ # specify the duration of the new clip to be the duration of the audio clip
29
+ video_clip.duration = audio.duration
30
+ # set the FPS to 1
31
+ video_clip.fps = 1
32
+ # write the resuling video clip
33
+ return video_clip
34
+
35
+ def get_stable_diffusion_image(prompt):
36
  model_id = "CompVis/stable-diffusion-v1-4"
37
  device = "cuda"
38
  pipe = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token=True)
 
41
  image = pipe(prompt, guidance_scale=7.5)["sample"][0]
42
  print(image)
43
  return image
44
+
45
 
46
+ iface = gr.Interface(fn=process_inputs, inputs=["text", "audio"], outputs="video")
47
+ iface.launch(share=setshare)
 
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt CHANGED
@@ -3,3 +3,5 @@ transformers
3
  nvidia-ml-py3
4
  ftfy
5
  --extra-index-url https://download.pytorch.org/whl/cu113 torch
 
 
 
3
  nvidia-ml-py3
4
  ftfy
5
  --extra-index-url https://download.pytorch.org/whl/cu113 torch
6
+ argparse
7
+ moviepy