Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| import time | |
| import math | |
| import cv2 | |
| import requests | |
| from pydub import AudioSegment | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| import gradio as gr | |
| from gradio_client import Client, file | |
| # Function to get a friendly name from an audio file name | |
| def get_friendly_name(filename): | |
| return os.path.splitext(filename)[0].capitalize() | |
| # Get audio files and their friendly names | |
| audio_files_dir = "audio_folder" # Path to your audio folder | |
| audio_files = [(get_friendly_name(f), f) for f in os.listdir(audio_files_dir) if f.endswith(".mp3") or f.endswith(".wav")] | |
| # Load environment variables | |
| load_dotenv(override=True) | |
| LEMONFOX_API_KEY = os.getenv("LEMONFOX_API_KEY") | |
| def parse(narration): | |
| data = [] | |
| narrations = [] | |
| lines = narration.split("\n") | |
| for line in lines: | |
| if line.startswith('Narrator: '): | |
| text = line.replace('Narrator: ', '') | |
| data.append({ | |
| "type": "text", | |
| "content": text.strip('"'), | |
| }) | |
| narrations.append(text.strip('"')) | |
| elif line.startswith('['): | |
| background = line.strip('[]') | |
| data.append({ | |
| "type": "image", | |
| "description": background, | |
| }) | |
| return data, narrations | |
| def create(data, output_folder, audio_file): | |
| if not os.path.exists(output_folder): | |
| os.makedirs(output_folder) | |
| # Initialize Gradio Client | |
| client = Client("tonyassi/voice-clone") | |
| audio_files_dir = "audio_folder" # Path to your audio folder | |
| for element in data: | |
| if element["type"] != "text": | |
| continue | |
| # Make prediction using the provided API | |
| audio_file_path = os.path.join(audio_files_dir, audio_file) | |
| result = client.predict( | |
| text=element["content"], | |
| audio=file(audio_file_path) # Include reference style audio for API | |
| ) | |
| # Move the response audio file to the output folder | |
| temp_dir = os.path.dirname(result) | |
| response_file_path = os.path.join(output_folder, f"narration_{len(os.listdir(output_folder)) + 1}.wav") | |
| shutil.move(result, response_file_path) | |
| print(f"Audio file generated for '{element['content']}' saved at: {response_file_path}") | |
| def generate(prompt, output_file, size="576x1024"): | |
| url = 'https://api.lemonfox.ai/v1/images/generations' | |
| headers = { | |
| 'Authorization': LEMONFOX_API_KEY, | |
| 'Content-Type': 'application/json' | |
| } | |
| data = { | |
| 'prompt': prompt, | |
| 'size': size, | |
| 'n': 1 | |
| } | |
| try: | |
| response = requests.post(url, json=data, headers=headers) | |
| if response.ok: | |
| response_data = response.json() | |
| if 'data' in response_data and len(response_data['data']) > 0: | |
| image_info = response_data['data'][0] | |
| image_url = image_info['url'] | |
| image_response = requests.get(image_url) | |
| with open(output_file, 'wb') as f: | |
| f.write(image_response.content) | |
| else: | |
| print(f"No image data found for prompt: {prompt}") | |
| else: | |
| print(f"Failed to generate image for prompt: {prompt}. Status Code: {response.status_code}") | |
| except Exception as e: | |
| print(f"Error occurred while processing prompt: {prompt}") | |
| print(str(e)) | |
| def create_from_data(data, output_dir): | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| image_number = 0 | |
| for element in data: | |
| if element["type"] != "image": | |
| continue | |
| image_number += 1 | |
| image_name = f"image_{image_number}.webp" | |
| generate(element["description"], os.path.join(output_dir, image_name)) | |
| def get_audio_duration(audio_file): | |
| return len(AudioSegment.from_file(audio_file)) | |
| def resize_image(image, width, height): | |
| aspect_ratio = image.shape[1] / image.shape[0] | |
| if aspect_ratio > (width / height): | |
| new_width = width | |
| new_height = int(width / aspect_ratio) | |
| else: | |
| new_height = height | |
| new_width = int(height * aspect_ratio) | |
| return cv2.resize(image, (new_width, new_height)) | |
| def write_text(text, frame, video_writer): | |
| font = cv2.FONT_HERSHEY_SIMPLEX | |
| white_color = (255, 255, 255) | |
| black_color = (0, 0, 0) | |
| thickness = 10 | |
| font_scale = 3 | |
| border = 5 | |
| text_size = cv2.getTextSize(text, font, font_scale, thickness)[0] | |
| text_x = (frame.shape[1] - text_size[0]) // 2 | |
| text_y = (frame.shape[0] + text_size[1]) // 2 | |
| org = (text_x, text_y) | |
| frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA) | |
| frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA) | |
| video_writer.write(frame) | |
| def add_narration_to_video(narrations, input_video, output_dir, output_file, text_color, text_position): | |
| offset = 50 | |
| cap = cv2.VideoCapture(input_video) | |
| fourcc = cv2.VideoWriter_fourcc(*'XVID') | |
| temp_video = os.path.join(output_dir, "with_transcript.avi") | |
| out = cv2.VideoWriter(temp_video, fourcc, 60, (int(cap.get(3)), int(cap.get(4)))) | |
| full_narration = AudioSegment.empty() | |
| for i, narration in enumerate(narrations): | |
| audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.wav") | |
| duration = get_audio_duration(audio) | |
| narration_frames = math.floor(duration / 2000 * 60) | |
| full_narration += AudioSegment.from_file(audio) | |
| char_count = len(narration.replace(" ", "")) | |
| ms_per_char = duration / char_count | |
| frames_written = 0 | |
| words = narration.split(" ") | |
| for w, word in enumerate(words): | |
| word_ms = len(word) * ms_per_char | |
| if i == 0 and w == 0: | |
| word_ms -= offset | |
| if word_ms < 0: | |
| word_ms = 0 | |
| for _ in range(math.floor(word_ms/2000*60)): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| write_text(word, frame, out) | |
| frames_written += 1 | |
| for _ in range(narration_frames - frames_written): | |
| ret, frame = cap.read() | |
| out.write(frame) | |
| while out.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| out.write(frame) | |
| temp_narration = os.path.join(output_dir, "narration.wav") | |
| full_narration.export(temp_narration, format="wav") | |
| cap.release() | |
| out.release() | |
| cv2.destroyAllWindows() | |
| ffmpeg_command = [ | |
| 'ffmpeg', | |
| '-y', | |
| '-i', temp_video, | |
| '-i', temp_narration, | |
| '-map', '0:v', | |
| '-map', '1:a', | |
| '-c:v', 'copy', | |
| '-c:a', 'aac', | |
| '-strict', 'experimental', | |
| os.path.join(output_dir, output_file) | |
| ] | |
| subprocess.run(ffmpeg_command, capture_output=True) | |
| os.remove(temp_video) | |
| os.remove(temp_narration) | |
| def create_video(narrations, output_dir, output_file, text_color, text_position): # Add text_color and text_position parameters here | |
| width, height = 1080, 1920 | |
| frame_rate = 60 | |
| fade_time = 2000 | |
| fourcc = cv2.VideoWriter_fourcc(*'XVID') | |
| temp_video = os.path.join(output_dir, "temp_video.avi") | |
| out = cv2.VideoWriter(temp_video, fourcc, frame_rate, (width, height)) | |
| image_paths = os.listdir(os.path.join(output_dir, "images")) | |
| image_count = len(image_paths) | |
| for i in range(image_count): | |
| image1 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+1}.webp")) | |
| if i+1 < image_count: | |
| image2 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+2}.webp")) | |
| else: | |
| image2 = cv2.imread(os.path.join(output_dir, "images", f"image_1.webp")) | |
| image1 = resize_image(image1, width, height) | |
| image2 = resize_image(image2, width, height) | |
| narration = os.path.join(output_dir, "narrations", f"narration_{i+1}.wav") | |
| duration = get_audio_duration(narration) | |
| if i > 0: | |
| duration -= fade_time | |
| if i == image_count-1: | |
| duration -= fade_time | |
| for _ in range(math.floor(duration/2000*60)): | |
| vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8) | |
| vertical_video_frame[:image1.shape[0], :] = image1 | |
| out.write(vertical_video_frame) | |
| for alpha in np.linspace(0, 1, math.floor(fade_time/1000*30)): | |
| blended_image = cv2.addWeighted(image1, 1 - alpha, image2, alpha, 0) | |
| vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8) | |
| vertical_video_frame[:image1.shape[0], :] = blended_image | |
| out.write(vertical_video_frame) | |
| out.release() | |
| cv2.destroyAllWindows() | |
| add_narration_to_video(narrations, temp_video, output_dir, output_file, text_color, text_position) # Pass text_color and text_position here | |
| os.remove(temp_video) | |
| def generate_video(topic, voice_choice): | |
| short_id = str(int(time.time())) | |
| basedir = os.path.join("shorts", short_id) | |
| if not os.path.exists(basedir): | |
| os.makedirs(basedir) | |
| filename = topic.replace("_", " ").replace("/", "_").replace(".", "_") | |
| output_file = f"{filename}.avi" | |
| # Extract the voice file based on voice_choice | |
| voice_file = [file for name, file in audio_files if name == voice_choice][0] | |
| chat_url = 'https://api.lemonfox.ai/v1/chat/completions' | |
| headers = { | |
| 'Authorization': f'Bearer {LEMONFOX_API_KEY}', | |
| 'Content-Type': 'application/json' | |
| } | |
| payload = { | |
| "model": "mixtral-chat", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "You are a YouTube short video creator." | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"""make a short video on: \n\n{topic} Generate 60 seconds to 1 minute of video. You will need to generate a very short description of images for each of the sentences. They will be used for background images. Note that the script will be fed into a text-to-speech engine, so dont use special characters. Respond with a pair of an image description in square brackets and a script below it. Both of them should be on their own lines, as follows: ### | |
| [Description of a background image] | |
| Narrator: "One sentence of narration" | |
| ### The short should be 6 sentences maximum.""" | |
| } | |
| ] | |
| } | |
| response = requests.post(chat_url, json=payload, headers=headers) | |
| if response.status_code == 200: | |
| response_text = response.json()['choices'][0]['message']['content'] | |
| response_text = response_text.replace("β", "'").replace("`", "'").replace("β¦", "...").replace("β", '"').replace("β", '"') | |
| with open(os.path.join(basedir, f"response.txt"), "a") as f: | |
| f.write(response_text + "\n") | |
| data, narrations = parse(response_text) | |
| with open(os.path.join(basedir, f"data.json"), "a") as f: | |
| json.dump(data, f, ensure_ascii=False) | |
| f.write("\n") | |
| print(f"Generating narration for: {topic}...") | |
| create(data, os.path.join(basedir, f"narrations"), voice_file) | |
| print("Generating images...") | |
| create_from_data(data, os.path.join(basedir, f"images")) | |
| print("Generating video...") | |
| create_video(narrations, basedir, output_file, text_color="white", text_position="center") # Pass text_color and text_position here | |
| print("Deleting files and folders...") | |
| os.remove(os.path.join(basedir, "response.txt")) | |
| os.remove(os.path.join(basedir, "data.json")) | |
| shutil.rmtree(os.path.join(basedir, "narrations")) | |
| shutil.rmtree(os.path.join(basedir, "images")) | |
| print(f"DONE! Here's your video: {os.path.join(basedir, output_file)}") | |
| return os.path.join(basedir, output_file) | |
| else: | |
| print(f"Failed to generate script for source material: {topic}. Status Code: {response.status_code}") | |
| return None | |
| iface = gr.Interface( | |
| fn=generate_video, | |
| inputs=["text", gr.Dropdown(choices=[name for name, _ in audio_files], label="Select Voice")], | |
| outputs="video", | |
| css="footer {visibility: hidden}", | |
| description="Generate a free short video. Best for YouTube Shorts, Instagram Reels or TikTok. This is a prototype. If you want better software, please inbox or email me at aheedsajid@gmail.com and do like and [Click here to Donate](https://nowpayments.io/donation/aheed)", | |
| title="Text to Short Video Free" | |
| ) | |
| iface.launch() |