Spaces:

Salman11223
/

Video_translation

Runtime error

App Files Files Community

Video_translation / translate.py

Salman11223

Update translate.py

63ba6cc verified 5 months ago

raw

history blame

4.45 kB

	from TTS.api import TTS

	# Confirm licensing or agreement to terms
	confirm_license = input("Have you purchased a commercial license from Coqui or agree to the terms of the non-commercial CPML? (y/n): ") or "yes"

	# Initialize TTS
	tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")


	import subprocess
	import assemblyai as aai
	import requests
	import moviepy.editor as mp

	class translation:
	def __init__(self,video_path,original_language,target_language):
	self.video_path= video_path
	self.original_language = original_language
	self.target_language = target_language



	def org_language_parameters(self,original_language):
	if original_language == 'English':
	self.lan_code='en'
	elif original_language =='German':
	self.lan_code='de'
	elif original_language =='Italian':
	self.lan_code='it'
	elif original_language =='Spanish':
	self.lan_code='es'
	else:
	self.lan_code = ''

	def target_language_parameters(self,target_language):
	if target_language == 'English':
	self.tran_code='en'
	elif target_language =='German':
	self.tran_code='de'
	elif target_language =='Italian':
	self.tran_code='it'
	elif target_language =='Spanish':
	self.tran_code='es'
	else:
	self.tran_code = ''

	def extract_audio(self):
	video = mp.VideoFileClip(self.video_path)
	audio = video.audio
	audio_path = "output_audio.wav"
	audio.write_audiofile(audio_path)
	print("Audio extracted successfully!")
	return audio_path


	def transcribe_audio(self,audio_path):
	aai.settings.api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
	config = aai.TranscriptionConfig(language_code=self.lan_code)
	transcriber = aai.Transcriber(config=config)
	transcript = transcriber.transcribe(audio_path)
	transcript_text = transcript.text
	return transcript_text
	if transcript.status == aai.TranscriptStatus.error:
	print(transcript.error)
	return None


	def translate_text(self,transcript_text):
	base_url = "https://api.cognitive.microsofttranslator.com"
	endpoint = "/translate"
	headers = {
	"Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
	"Content-Type": "application/json",
	"Ocp-Apim-Subscription-Region": "southeastasia"
	}
	params = {
	"api-version": "3.0",
	"from": self.lan_code,
	"to": self.tran_code
	}
	body = [{"text": transcript_text}]
	response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
	response.raise_for_status()
	translation = response.json()[0]["translations"][0]["text"]
	return translation




	#generate audio
	def generate_audio(self,translated_text):
	tts.tts_to_file(translated_text,
	speaker_wav='output_audio.wav',
	file_path="output_synth.wav",
	language= self.tran_code
	)
	return "output_synth.wav"

	def translate_video(self):
	audio_path = self.extract_audio()
	self.org_language_parameters(self.original_language)
	self.target_language_parameters(self.target_language)
	transcript_text = self.transcribe_audio(audio_path)
	translated_text = self.translate_text(transcript_text)
	translated_audio_path = self.generate_audio(translated_text)
	#Generate video
	%cd /content/Wav2Lip

	#This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue
	pad_top = 0
	pad_bottom = 15
	pad_left = 0
	pad_right = 0
	rescaleFactor = 1
	video_path_fix = f"'../{self.video_path}'"
	audio_path_fix = f"'../{translated_audio_path}'"
	!python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio $audio_path_fix --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'
	return '/content/output_video.mp4' # Return the path to the translated video file
	# return '/content/output_video.mp4', open('/content/output_video.mp4', 'rb') # Return the path and file object of the translated video file