Spaces:

walter2161
/

free-shorts-maker

Sleeping

App Files Files Community

free-shorts-maker / app.py

walter2161

Update app.py

1dae8a0 verified 5 months ago

raw

history blame

12.6 kB

	import json
	import os
	import shutil
	import subprocess
	import sys
	import time
	import math
	import cv2
	import requests
	from pydub import AudioSegment
	import numpy as np
	from dotenv import load_dotenv
	import gradio as gr
	from gradio_client import Client, file

	# Function to get a friendly name from an audio file name
	def get_friendly_name(filename):
	return os.path.splitext(filename)[0].capitalize()

	# Get audio files and their friendly names
	audio_files_dir = "audio_folder" # Path to your audio folder
	audio_files = [(get_friendly_name(f), f) for f in os.listdir(audio_files_dir) if f.endswith(".mp3") or f.endswith(".wav")]

	# Load environment variables
	load_dotenv(override=True)
	LEMONFOX_API_KEY = os.getenv("LEMONFOX_API_KEY")

	def parse(narration):
	data = []
	narrations = []
	lines = narration.split("\n")
	for line in lines:
	if line.startswith('Narrator: '):
	text = line.replace('Narrator: ', '')
	data.append({
	"type": "text",
	"content": text.strip('"'),
	})
	narrations.append(text.strip('"'))
	elif line.startswith('['):
	background = line.strip('[]')
	data.append({
	"type": "image",
	"description": background,
	})
	return data, narrations

	def create(data, output_folder, audio_file):
	if not os.path.exists(output_folder):
	os.makedirs(output_folder)
	# Initialize Gradio Client
	client = Client("tonyassi/voice-clone")
	audio_files_dir = "audio_folder" # Path to your audio folder
	for element in data:
	if element["type"] != "text":
	continue
	# Make prediction using the provided API
	audio_file_path = os.path.join(audio_files_dir, audio_file)
	result = client.predict(
	text=element["content"],
	audio=file(audio_file_path) # Include reference style audio for API
	)
	# Move the response audio file to the output folder
	temp_dir = os.path.dirname(result)
	response_file_path = os.path.join(output_folder, f"narration_{len(os.listdir(output_folder)) + 1}.wav")
	shutil.move(result, response_file_path)
	print(f"Audio file generated for '{element['content']}' saved at: {response_file_path}")

	def generate(prompt, output_file, size="576x1024"):
	url = 'https://api.lemonfox.ai/v1/images/generations'
	headers = {
	'Authorization': LEMONFOX_API_KEY,
	'Content-Type': 'application/json'
	}
	data = {
	'prompt': prompt,
	'size': size,
	'n': 1
	}
	try:
	response = requests.post(url, json=data, headers=headers)
	if response.ok:
	response_data = response.json()
	if 'data' in response_data and len(response_data['data']) > 0:
	image_info = response_data['data'][0]
	image_url = image_info['url']
	image_response = requests.get(image_url)
	with open(output_file, 'wb') as f:
	f.write(image_response.content)
	else:
	print(f"No image data found for prompt: {prompt}")
	else:
	print(f"Failed to generate image for prompt: {prompt}. Status Code: {response.status_code}")
	except Exception as e:
	print(f"Error occurred while processing prompt: {prompt}")
	print(str(e))

	def create_from_data(data, output_dir):
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	image_number = 0
	for element in data:
	if element["type"] != "image":
	continue
	image_number += 1
	image_name = f"image_{image_number}.webp"
	generate(element["description"], os.path.join(output_dir, image_name))

	def get_audio_duration(audio_file):
	return len(AudioSegment.from_file(audio_file))

	def resize_image(image, width, height):
	aspect_ratio = image.shape[1] / image.shape[0]
	if aspect_ratio > (width / height):
	new_width = width
	new_height = int(width / aspect_ratio)
	else:
	new_height = height
	new_width = int(height * aspect_ratio)
	return cv2.resize(image, (new_width, new_height))

	def write_text(text, frame, video_writer):
	font = cv2.FONT_HERSHEY_SIMPLEX
	white_color = (255, 255, 255)
	black_color = (0, 0, 0)
	thickness = 10
	font_scale = 3
	border = 5
	text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
	text_x = (frame.shape[1] - text_size[0]) // 2
	text_y = (frame.shape[0] + text_size[1]) // 2
	org = (text_x, text_y)
	frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA)
	frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA)
	video_writer.write(frame)

	def add_narration_to_video(narrations, input_video, output_dir, output_file, text_color, text_position):
	offset = 50
	cap = cv2.VideoCapture(input_video)
	fourcc = cv2.VideoWriter_fourcc(*'XVID')
	temp_video = os.path.join(output_dir, "with_transcript.avi")
	out = cv2.VideoWriter(temp_video, fourcc, 60, (int(cap.get(3)), int(cap.get(4))))
	full_narration = AudioSegment.empty()
	for i, narration in enumerate(narrations):
	audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.wav")
	duration = get_audio_duration(audio)
	narration_frames = math.floor(duration / 2000 * 60)
	full_narration += AudioSegment.from_file(audio)
	char_count = len(narration.replace(" ", ""))
	ms_per_char = duration / char_count
	frames_written = 0
	words = narration.split(" ")
	for w, word in enumerate(words):
	word_ms = len(word) * ms_per_char
	if i == 0 and w == 0:
	word_ms -= offset
	if word_ms < 0:
	word_ms = 0
	for _ in range(math.floor(word_ms/2000*60)):
	ret, frame = cap.read()
	if not ret:
	break
	write_text(word, frame, out)
	frames_written += 1
	for _ in range(narration_frames - frames_written):
	ret, frame = cap.read()
	out.write(frame)
	while out.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	out.write(frame)
	temp_narration = os.path.join(output_dir, "narration.wav")
	full_narration.export(temp_narration, format="wav")
	cap.release()
	out.release()
	cv2.destroyAllWindows()
	ffmpeg_command = [
	'ffmpeg',
	'-y',
	'-i', temp_video,
	'-i', temp_narration,
	'-map', '0:v',
	'-map', '1:a',
	'-c:v', 'copy',
	'-c:a', 'aac',
	'-strict', 'experimental',
	os.path.join(output_dir, output_file)
	]
	subprocess.run(ffmpeg_command, capture_output=True)
	os.remove(temp_video)
	os.remove(temp_narration)

	def create_video(narrations, output_dir, output_file, text_color, text_position): # Add text_color and text_position parameters here
	width, height = 1080, 1920
	frame_rate = 60
	fade_time = 2000
	fourcc = cv2.VideoWriter_fourcc(*'XVID')
	temp_video = os.path.join(output_dir, "temp_video.avi")
	out = cv2.VideoWriter(temp_video, fourcc, frame_rate, (width, height))
	image_paths = os.listdir(os.path.join(output_dir, "images"))
	image_count = len(image_paths)
	for i in range(image_count):
	image1 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+1}.webp"))
	if i+1 < image_count:
	image2 = cv2.imread(os.path.join(output_dir, "images", f"image_{i+2}.webp"))
	else:
	image2 = cv2.imread(os.path.join(output_dir, "images", f"image_1.webp"))
	image1 = resize_image(image1, width, height)
	image2 = resize_image(image2, width, height)
	narration = os.path.join(output_dir, "narrations", f"narration_{i+1}.wav")
	duration = get_audio_duration(narration)
	if i > 0:
	duration -= fade_time
	if i == image_count-1:
	duration -= fade_time
	for _ in range(math.floor(duration/2000*60)):
	vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
	vertical_video_frame[:image1.shape[0], :] = image1
	out.write(vertical_video_frame)
	for alpha in np.linspace(0, 1, math.floor(fade_time/1000*30)):
	blended_image = cv2.addWeighted(image1, 1 - alpha, image2, alpha, 0)
	vertical_video_frame = np.zeros((height, width, 3), dtype=np.uint8)
	vertical_video_frame[:image1.shape[0], :] = blended_image
	out.write(vertical_video_frame)
	out.release()
	cv2.destroyAllWindows()
	add_narration_to_video(narrations, temp_video, output_dir, output_file, text_color, text_position) # Pass text_color and text_position here
	os.remove(temp_video)

	def generate_video(topic, voice_choice):
	short_id = str(int(time.time()))
	basedir = os.path.join("shorts", short_id)
	if not os.path.exists(basedir):
	os.makedirs(basedir)
	filename = topic.replace("_", " ").replace("/", "_").replace(".", "_")
	output_file = f"{filename}.avi"
	# Extract the voice file based on voice_choice
	voice_file = [file for name, file in audio_files if name == voice_choice][0]
	chat_url = 'https://api.lemonfox.ai/v1/chat/completions'
	headers = {
	'Authorization': f'Bearer {LEMONFOX_API_KEY}',
	'Content-Type': 'application/json'
	}
	payload = {
	"model": "mixtral-chat",
	"messages": [
	{
	"role": "system",
	"content": "You are a YouTube short video creator."
	},
	{
	"role": "user",
	"content": f"""make a short video on: \n\n{topic} Generate 60 seconds to 1 minute of video. You will need to generate a very short description of images for each of the sentences. They will be used for background images. Note that the script will be fed into a text-to-speech engine, so dont use special characters. Respond with a pair of an image description in square brackets and a script below it. Both of them should be on their own lines, as follows: ###
	[Description of a background image]
	Narrator: "One sentence of narration"
	### The short should be 6 sentences maximum."""
	}
	]
	}
	response = requests.post(chat_url, json=payload, headers=headers)

	if response.status_code == 200:
	response_text = response.json()['choices'][0]['message']['content']
	response_text = response_text.replace("’", "'").replace("`", "'").replace("…", "...").replace("“", '"').replace("”", '"')
	with open(os.path.join(basedir, f"response.txt"), "a") as f:
	f.write(response_text + "\n")
	data, narrations = parse(response_text)
	with open(os.path.join(basedir, f"data.json"), "a") as f:
	json.dump(data, f, ensure_ascii=False)
	f.write("\n")
	print(f"Generating narration for: {topic}...")
	create(data, os.path.join(basedir, f"narrations"), voice_file)
	print("Generating images...")
	create_from_data(data, os.path.join(basedir, f"images"))
	print("Generating video...")
	create_video(narrations, basedir, output_file, text_color="white", text_position="center") # Pass text_color and text_position here
	print("Deleting files and folders...")
	os.remove(os.path.join(basedir, "response.txt"))
	os.remove(os.path.join(basedir, "data.json"))
	shutil.rmtree(os.path.join(basedir, "narrations"))
	shutil.rmtree(os.path.join(basedir, "images"))
	print(f"DONE! Here's your video: {os.path.join(basedir, output_file)}")
	return os.path.join(basedir, output_file)
	else:
	print(f"Failed to generate script for source material: {topic}. Status Code: {response.status_code}")
	return None

	iface = gr.Interface(
	fn=generate_video,
	inputs=["text", gr.Dropdown(choices=[name for name, _ in audio_files], label="Select Voice")],
	outputs="video",
	css="footer {visibility: hidden}",
	description="Generate a free short video. Best for YouTube Shorts, Instagram Reels or TikTok. This is a prototype. If you want better software, please inbox or email me at aheedsajid@gmail.com and do like and [Click here to Donate](https://nowpayments.io/donation/aheed)",
	title="Text to Short Video Free"
	)

	iface.launch()