Spaces:
Running
Running
import os | |
import json | |
import urllib.request | |
from PIL import Image | |
from gtts import gTTS | |
import cv2 | |
import moviepy.editor as mp | |
import logging | |
from openai import AzureOpenAI | |
import uuid | |
import time | |
import gradio as gr | |
# Configure logging | |
log_dir = os.getenv('LOG_DIRECTORY', './') | |
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log') | |
logging.basicConfig( | |
filename=LOGGER_FILE_PATH, | |
filemode='a', | |
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', | |
datefmt='%Y-%b-%d %H:%M:%S' | |
) | |
LOGGER = logging.getLogger(__name__) | |
log_level_env = os.getenv('LOG_LEVEL', 'INFO') | |
log_level_dict = { | |
'DEBUG': logging.DEBUG, | |
'INFO': logging.INFO, | |
'WARNING': logging.WARNING, | |
'ERROR': logging.ERROR, | |
'CRITICAL': logging.CRITICAL | |
} | |
if log_level_env in log_level_dict: | |
log_level = log_level_dict[log_level_env] | |
else: | |
log_level = log_level_dict['INFO'] | |
LOGGER.setLevel(log_level) | |
class Text2Video: | |
"""A class to generate videos from text prompts.""" | |
def __init__(self) -> None: | |
""" | |
Initialize the Text2Video class. | |
Args: | |
file_path (str): Path to the configuration file. | |
""" | |
self.client = AzureOpenAI() | |
def get_image(self, img_prompt: str) -> str: | |
""" | |
Generate an image based on the provided text prompt. | |
Args: | |
img_prompt (str): Text prompt for generating the image. | |
Returns: | |
str: URL of the generated image. | |
""" | |
try: | |
# Generate image using Azure OpenAI client | |
result = self.client.images.generate( | |
model="Dalle3", | |
prompt=f"text: ``` {img_prompt}```. add this backticked text in the image as comics.", | |
n=1, | |
quality="standard", | |
size="1792x1024", | |
style="vivid" | |
) | |
# Extract URL from the result | |
image_url = json.loads(result.model_dump_json())['data'][0]['url'] | |
return image_url | |
except Exception as e: | |
# Log any errors encountered during image generation | |
LOGGER.error(f"Error generating image: {e}") | |
return "" | |
def download_img_from_url(self, image_url: str, image_path: str) -> str: | |
""" | |
Download an image from a URL. | |
Args: | |
image_url (str): URL of the image to download. | |
image_path (str): Path to save the downloaded image. | |
Returns: | |
str: Path of the downloaded image. | |
""" | |
try: | |
# Download the image from the provided URL and save it to the specified path | |
urllib.request.urlretrieve(image_url, image_path) | |
return image_path # Return the path of the downloaded image if successful | |
except Exception as e: | |
# Log any errors encountered during image download | |
LOGGER.error(f"Error downloading image from URL: {e}") | |
return "" # Return an empty string if an error occurs | |
def text_to_audio(self, img_prompt: str, audio_path: str) -> str: | |
""" | |
Convert text to speech and save it as an audio file. | |
Args: | |
img_prompt (str): Text to convert to speech. | |
audio_path (str): Path to save the audio file. | |
Returns: | |
str: Path of the saved audio file. | |
""" | |
try: | |
language = 'en' | |
# Create a gTTS object to convert text to speech | |
myobj = gTTS(text=img_prompt, lang=language, slow=False) | |
# Save the audio file at the specified path | |
myobj.save(audio_path) | |
# Return the path of the saved audio file if successful | |
return audio_path | |
except Exception as e: | |
# Log any errors encountered during text-to-audio conversion | |
LOGGER.error(f"Error converting text to audio: {e}") | |
return "" | |
def get_images_and_audio(self, list_prompts: list) -> tuple: | |
""" | |
Generate images and corresponding audio files from a list of prompts. | |
Args: | |
list_prompts (list): List of text prompts. | |
Returns: | |
tuple: A tuple containing lists of image paths and audio paths. | |
""" | |
img_list = [] # Initialize an empty list to store image paths | |
audio_paths = [] # Initialize an empty list to store audio paths | |
for img_prompt in list_prompts: | |
try: | |
# Generate a unique identifier for this file | |
unique_id = uuid.uuid4().hex | |
# Construct the image path using the unique identifier | |
image_path = f"{img_prompt[:9]}_{unique_id}.png" | |
# Generate image URL based on the prompt | |
img_url = self.get_image(img_prompt) | |
# Download and save the image | |
image = self.download_img_from_url(img_url, image_path) | |
# Add the image path to the list | |
img_list.append(image) | |
# Construct the audio path using the unique identifier | |
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3" | |
# Convert text to audio and save it | |
audio = self.text_to_audio(img_prompt, audio_path) | |
# Add the audio path to the list | |
audio_paths.append(audio) | |
except Exception as e: | |
LOGGER.error(f"Error processing prompt: {img_prompt}, {e}") | |
# Return lists of image paths and audio paths as a tuple | |
return img_list, audio_paths | |
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None: | |
""" | |
Create a video from images and corresponding audio files. | |
Args: | |
image_files (list): List of image files. | |
audio_files (list): List of audio files. | |
output_path (str): Path to save the output video file. | |
""" | |
try: | |
# Check if the number of images matches the number of audio files | |
if len(image_files) != len(audio_files): | |
LOGGER.error("Error: Number of images doesn't match the number of audio files.") | |
return | |
# Initialize an empty list to store video clips | |
video_clips = [] | |
for image_file, audio_file in zip(image_files, audio_files): | |
# Read the image frame | |
frame = cv2.imread(image_file) | |
# Load the audio clip | |
audio_clip = mp.AudioFileClip(audio_file) | |
# Create video clip with image | |
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration) | |
# Set audio for the video clip | |
video_clip = video_clip.set_audio(audio_clip) | |
# Append the video clip to the list | |
video_clips.append(video_clip) | |
# Concatenate all video clips into a single clip | |
final_clip = mp.concatenate_videoclips(video_clips) | |
# Write the final video to the output path | |
final_clip.write_videofile(output_path, codec='libx264', fps=24) | |
print("Video created successfully.") | |
except Exception as e: | |
# Log any errors encountered during video creation | |
LOGGER.error(f"Error creating video: {e}") | |
def generate_video(self, text: list) -> None: | |
""" | |
Generate a video from a list of text prompts. | |
Args: | |
list_prompts (list): List of text prompts. | |
""" | |
try: | |
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()] | |
# Set the output path for the generated video | |
output_path = "output_video1.mp4" | |
# Generate images and corresponding audio files | |
img_list, audio_paths = self.get_images_and_audio(list_prompts) | |
# Create video from images and audio | |
self.create_video_from_images_and_audio(img_list, audio_paths, output_path) | |
return output_path | |
except Exception as e: | |
# Log any errors encountered during video generation | |
LOGGER.error(f"Error generating video: {e}") | |
def gradio_interface(self): | |
with gr.Blocks(css ="style.css", theme='abidlabs/dracula_revamped') as demo: | |
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town. | |
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,, | |
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,""" | |
gr.HTML(""" | |
<center><h1 style="color:#fff">Comics Video Generator</h1></center>""") | |
with gr.Row(elem_id = "col-container"): | |
input_text = gr.Textbox(label="Comics Text",placeholder="Enter the comics by double comma separated") | |
with gr.Row(elem_id = "col-container"): | |
button = gr.Button("Generate Video") | |
with gr.Row(elem_id = "col-container"): | |
output = gr.PlayableVideo() | |
with gr.Row(elem_id = "col-container"): | |
example = gr.Examples([example_txt], input_text) | |
button.click(self.generate_video,[input_text],output) | |
demo.launch(debug =True) | |
if __name__ == "__main__": | |
text2video = Text2Video() | |
text2video.gradio_interface() |