Spaces:
Build error
Build error
File size: 6,063 Bytes
2355e92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
from TTS.api import TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")
# Dependencies
%cd /content/
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!git clone https://github.com/justinjohn0306/Wav2Lip
!cd Wav2Lip && pip install -r requirements_colab.txt
%cd /content/Wav2Lip
!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'
!wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'
import subprocess
import assemblyai as aai
import requests
import moviepy.editor as mp
class translation:
def __init__(self,video_path,original_language,target_language):
self.video_path= video_path
self.original_language = original_language
self.target_language = target_language
def org_language_parameters(self,original_language):
if original_language == 'English':
self.lan_code='en'
elif original_language =='German':
self.lan_code='de'
elif original_language =='French':
self.lan_code='fr'
elif original_language =='Spanish':
self.lan_code='es'
else:
self.lan_code = ''
def target_language_parameters(self,target_language):
if target_language == 'English':
self.tran_code='en'
elif target_language =='German':
self.tran_code='de'
elif target_language =='French':
self.tran_code='fr'
elif target_language =='Spanish':
self.tran_code='es'
else:
self.tran_code = ''
def extract_audio(self):
video = mp.VideoFileClip(self.video_path)
audio = video.audio
audio_path = "output_audio.wav"
audio.write_audiofile(audio_path)
print("Audio extracted successfully!")
return audio_path
def transcribe_audio(self,audio_path):
aai.settings.api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
config = aai.TranscriptionConfig(language_code=self.lan_code)
transcriber = aai.Transcriber(config=config)
transcript = transcriber.transcribe(audio_path)
transcript_text = transcript.text
return transcript_text
if transcript.status == aai.TranscriptStatus.error:
print(transcript.error)
return None
def translate_text(self,transcript_text):
base_url = "https://api.cognitive.microsofttranslator.com"
endpoint = "/translate"
headers = {
"Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
"Content-Type": "application/json",
"Ocp-Apim-Subscription-Region": "southeastasia"
}
params = {
"api-version": "3.0",
"from": self.lan_code,
"to": self.tran_code
}
body = [{"text": transcript_text}]
response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
response.raise_for_status()
translation = response.json()[0]["translations"][0]["text"]
return translation
#generate audio
def generate_audio(self,translated_text):
tts.tts_to_file(translated_text,
speaker_wav='output_audio.wav',
file_path="output_synth.wav",
language= self.tran_code
)
return "output_synth.wav"
def translate_video(self):
audio_path = self.extract_audio()
self.org_language_parameters(self.original_language)
self.target_language_parameters(self.target_language)
transcript_text = self.transcribe_audio(audio_path)
translated_text = self.translate_text(transcript_text)
translated_audio_path = self.generate_audio(translated_text)
#Generate video
%cd /content/Wav2Lip
#This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue
pad_top = 0
pad_bottom = 15
pad_left = 0
pad_right = 0
rescaleFactor = 1
video_path_fix = f"'../{self.video_path}'"
audio_path_fix = f"'../{translated_audio_path}'"
!python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio $audio_path_fix --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'
return '/content/output_video.mp4' # Return the path to the translated video file
# return '/content/output_video.mp4', open('/content/output_video.mp4', 'rb') # Return the path and file object of the translated video file
# from translator import translation # Import the Translator class from translate module
import gradio as gr
import os
from google.colab import files
def app(video_path, original_language, target_language):
%cd /content/
video_name = os.path.basename(video_path)
# Save the uploaded file to the content folder in Colab
with open(video_name, "wb") as f:
with open(video_path, "rb") as uploaded_file:
f.write(uploaded_file.read())
translator = translation(video_name, original_language, target_language)
video_file = translator.translate_video()
return video_file
interface_video_file = gr.Interface(
fn=app,
inputs=[
gr.Video(label="Video Path"),
gr.Dropdown(["English", "German", "French", "Spanish"], label="Original Language"),
gr.Dropdown(["English", "German", "French", "Spanish"], label="Targeted Language"),
],
outputs=gr.Video(label="Translated Video")
)
interface_video_file.launch(debug=True)
|