import gradio as gr from typing import Dict from moviepy.editor import VideoFileClip, concatenate_videoclips class VideoCreator: def __init__(self, tts_pipeline, image_pipeline) -> None: self.tts_pipeline = tts_pipeline self.image_pipeline = image_pipeline def create_video(self, scenes: Dict, video_styles: str) -> str: videos_dict = {} for index, scene in enumerate(scenes): video_scene = self._create_video_from_scene(scenes[scene], video_styles) videos_dict[index] = video_scene merged_video = self._merge_videos(videos_dict) return merged_video def _create_video_from_scene(self, scene: Dict, video_styles: str) -> str: audio_file = self._get_audio_from_text(scene["Summary"]) bg_image = self._get_bg_image_from_description(scene["Illustration"], video_styles) video = gr.make_waveform(audio=audio_file, bg_image=bg_image) return video def _get_audio_from_text(self, voice_over: str) -> str: self.tts_pipeline.tts_to_file(text=voice_over, file_path="output.wav") return "output.wav" def _get_bg_image_from_description(self, img_desc: str, video_styles: str): images = self.image_pipeline(img_desc + ", " + video_styles) print("Image generated!") image_output = images.images[0] image_output.save("img.png") return "img.png" def _merge_videos(self, videos_dict: Dict) -> str: videos_to_concatenate = [] for video in range(len(videos_dict)): video_clip = VideoFileClip(videos_dict[video]) videos_to_concatenate.append(video_clip) final_video = concatenate_videoclips(videos_to_concatenate) try: final_video.write_videofile("final_video.mp4", threads=4) print("Saved .mp4 without Exception at final_video.mp4") return "final_video.mp4" except IndexError: # Short by one frame, so get rid on the last frame: final_video = final_video.subclip(t_end=(video_clip.duration - 1.0/final_video.fps)) final_video.write_videofile("final_video.mp4", threads=4) print("Saved .mp4 after Exception at final_video.mp4") return "final_video.mp4" except Exception as e: print("Exception {} was raised!!".format(e)) return "final_video.mp4"