jsonop

Sleeping

App Files Files Community

jsonop / app.py

sheikhed

Update app.py

07d1639 verified 10 months ago

raw

history blame

9.65 kB

	import os
	import requests
	import json
	import time
	import subprocess
	import gradio as gr
	import uuid
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# API Keys
	A_KEY = os.getenv("A_KEY") # ElevenLabs API key
	B_KEY = os.getenv("B_KEY") # Lipsync API key
	OPENAI_KEY = os.getenv("OPENAI_KEY") # OpenAI API key

	# URLs
	API_URL = os.getenv("API_URL")
	UPLOAD_URL = os.getenv("UPLOAD_URL")
	OPENAI_API_URL = "https://api.openai.com/v1/audio/speech"

	def get_elevenlabs_voices():
	url = "https://api.elevenlabs.io/v1/voices"
	headers = {
	"Accept": "application/json",
	"xi-api-key": A_KEY
	}

	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	return []
	return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]

	def get_openai_voices():
	# OpenAI voices are predefined
	return [
	("alloy", "alloy"),
	("echo", "echo"),
	("fable", "fable"),
	("onyx", "onyx"),
	("nova", "nova"),
	("shimmer", "shimmer")
	]

	def text_to_speech_elevenlabs(voice_id, text, session_id):
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": A_KEY
	}

	data = {
	"text": text,
	"model_id": "eleven_turbo_v2_5",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	response = requests.post(url, json=data, headers=headers)
	if response.status_code != 200:
	return None

	audio_file_path = f'temp_voice_{session_id}.mp3'
	with open(audio_file_path, 'wb') as audio_file:
	audio_file.write(response.content)
	return audio_file_path

	def text_to_speech_openai(voice, text, session_id):
	headers = {
	"Authorization": f"Bearer {OPENAI_KEY}",
	"Content-Type": "application/json"
	}

	data = {
	"model": "tts-1",
	"input": text,
	"voice": voice
	}

	response = requests.post(OPENAI_API_URL, headers=headers, json=data)
	if response.status_code != 200:
	return None

	audio_file_path = f'temp_voice_{session_id}.mp3'
	with open(audio_file_path, 'wb') as audio_file:
	audio_file.write(response.content)
	return audio_file_path

	def upload_file(file_path):
	with open(file_path, 'rb') as file:
	files = {'fileToUpload': (os.path.basename(file_path), file)}
	data = {'reqtype': 'fileupload'}
	response = requests.post(UPLOAD_URL, files=files, data=data)

	if response.status_code == 200:
	return response.text.strip()
	return None

	def lipsync_api_call(video_url, audio_url):
	headers = {
	"Content-Type": "application/json",
	"x-api-key": B_KEY
	}

	data = {
	"audioUrl": audio_url,
	"videoUrl": video_url,
	"maxCredits": 1000,
	"model": "sync-1.7.1-beta",
	"synergize": True,
	"pads": [0, 5, 0, 0],
	"synergizerStrength": 1
	}

	response = requests.post(API_URL, headers=headers, data=json.dumps(data))
	return response.json()

	def check_job_status(job_id):
	headers = {"x-api-key": B_KEY}
	max_attempts = 30

	for _ in range(max_attempts):
	response = requests.get(f"{API_URL}/{job_id}", headers=headers)
	data = response.json()

	if data["status"] == "COMPLETED":
	return data["videoUrl"]
	elif data["status"] == "FAILED":
	return None

	time.sleep(10)
	return None

	def get_media_duration(file_path):
	cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_path]
	result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	return float(result.stdout.strip())

	def combine_audio_video(video_path, audio_path, output_path):
	video_duration = get_media_duration(video_path)
	audio_duration = get_media_duration(audio_path)

	if video_duration > audio_duration:
	cmd = [
	'ffmpeg', '-i', video_path, '-i', audio_path,
	'-t', str(audio_duration),
	'-map', '0:v', '-map', '1:a',
	'-c:v', 'copy', '-c:a', 'aac',
	'-y', output_path
	]
	else:
	loop_count = int(audio_duration // video_duration) + 1
	cmd = [
	'ffmpeg', '-stream_loop', str(loop_count), '-i', video_path, '-i', audio_path,
	'-t', str(audio_duration),
	'-map', '0:v', '-map', '1:a',
	'-c:v', 'copy', '-c:a', 'aac',
	'-shortest', '-y', output_path
	]

	subprocess.run(cmd, check=True)

	def process_video(provider, voice, video_url, text, progress=gr.Progress()):
	session_id = str(uuid.uuid4())
	progress(0, desc="Generating speech...")

	if provider == "ElevenLabs":
	audio_path = text_to_speech_elevenlabs(voice, text, session_id)
	else: # OpenAI
	audio_path = text_to_speech_openai(voice, text, session_id)

	if not audio_path:
	return None, "Failed to generate speech audio."

	progress(0.2, desc="Processing video...")

	try:
	progress(0.3, desc="Uploading audio...")
	audio_url = upload_file(audio_path)

	if not audio_url:
	raise Exception("Failed to upload audio file")

	progress(0.4, desc="Initiating lipsync...")
	job_data = lipsync_api_call(video_url, audio_url)

	if "error" in job_data or "message" in job_data:
	raise Exception(job_data.get("error", job_data.get("message", "Unknown error")))

	job_id = job_data["id"]

	progress(0.5, desc="Processing lipsync...")
	result_url = check_job_status(job_id)

	if result_url:
	progress(0.9, desc="Downloading result...")
	response = requests.get(result_url)
	output_path = f"output_{session_id}.mp4"
	with open(output_path, "wb") as f:
	f.write(response.content)
	progress(1.0, desc="Complete!")
	return output_path, "Lipsync completed successfully!"
	else:
	raise Exception("Lipsync processing failed or timed out")

	except Exception as e:
	progress(0.8, desc="Falling back to simple combination...")
	try:
	video_response = requests.get(video_url)
	video_path = f"temp_video_{session_id}.mp4"
	with open(video_path, "wb") as f:
	f.write(video_response.content)

	output_path = f"output_{session_id}.mp4"
	combine_audio_video(video_path, audio_path, output_path)
	progress(1.0, desc="Complete!")
	return output_path, f"Used fallback method. Original error: {str(e)}"
	except Exception as fallback_error:
	return None, f"All methods failed. Error: {str(fallback_error)}"
	finally:
	if os.path.exists(audio_path):
	os.remove(audio_path)
	if os.path.exists(f"temp_video_{session_id}.mp4"):
	os.remove(f"temp_video_{session_id}.mp4")

	def create_interface():
	elevenlabs_voices = get_elevenlabs_voices()
	openai_voices = get_openai_voices()

	with gr.Blocks() as app:
	gr.Markdown("# Voice Synthesis Application")
	with gr.Row():
	with gr.Column():
	provider_dropdown = gr.Dropdown(choices=["ElevenLabs", "OpenAI"], label="Select Provider", value="ElevenLabs")
	voice_dropdown = gr.Dropdown(choices=[v[0] for v in elevenlabs_voices], label="Select Voice", value=elevenlabs_voices[0][0] if elevenlabs_voices else None)
	video_url_input = gr.Textbox(label="Enter Video URL")
	text_input = gr.Textbox(label="Enter text", lines=3)
	generate_btn = gr.Button("Generate Video")
	with gr.Column():
	video_output = gr.Video(label="Generated Video")
	status_output = gr.Textbox(label="Status", interactive=False)

	def update_voices(provider):
	if provider == "ElevenLabs":
	return gr.Dropdown(choices=[v[0] for v in elevenlabs_voices], value=elevenlabs_voices[0][0] if elevenlabs_voices else None)
	else: # OpenAI
	return gr.Dropdown(choices=[v[0] for v in openai_voices], value=openai_voices[0][0])

	provider_dropdown.change(fn=update_voices, inputs=[provider_dropdown], outputs=[voice_dropdown])

	def on_generate(provider, voice_name, video_url, text):
	try:
	if provider == "ElevenLabs":
	voices = elevenlabs_voices
	else: # OpenAI
	voices = openai_voices

	voice_id = next((v[1] for v in voices if v[0] == voice_name), None)
	if not voice_id:
	raise ValueError(f"Invalid voice selected for {provider}: {voice_name}")

	return process_video(provider, voice_id, video_url, text)
	except Exception as e:
	return None, f"Error: {str(e)}"

	generate_btn.click(
	fn=on_generate,
	inputs=[provider_dropdown, voice_dropdown, video_url_input, text_input],
	outputs=[video_output, status_output]
	)

	return app

	if __name__ == "__main__":
	app = create_interface()
	app.launch()