|
|
|
|
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
import cv2 |
|
import gradio as gr |
|
|
|
def text_to_wav(text, file_name): |
|
language = 'en' |
|
tts = gTTS(text=text, lang=language, slow=False) |
|
tts.save(file_name) |
|
|
|
def generate_video(input_image, input_text): |
|
text_file = "input_audio.wav" |
|
video_file = "/path/to/generated_video.mp4" |
|
|
|
text_to_wav(input_text, text_file) |
|
|
|
audio = AudioSegment.from_file(text_file) |
|
duration_seconds = len(audio) / 1000.0 |
|
|
|
img = cv2.imread(input_image) |
|
|
|
fps = 60 |
|
video_duration_seconds = round(duration_seconds) * 3 |
|
|
|
video = cv2.VideoWriter(video_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (img.shape[1], img.shape[0])) |
|
|
|
for _ in range(int(fps * video_duration_seconds)): |
|
video.write(img) |
|
|
|
video.release() |
|
|
|
return video_file |
|
|
|
if __name__ == "__main__": |
|
iface = gr.Interface( |
|
fn=generate_video, |
|
inputs=["image", "text"], |
|
outputs="video", |
|
) |
|
|
|
iface.launch(share=True) |
|
|