Spaces:

capstonedubtrack
/

Indiclanguagedubbing

Running

App Files Files Community

Indiclanguagedubbing / app.py

capstonedubtrack

Update app.py

0058c00 verified 4 months ago

raw

history blame

3.66 kB

	import os
	import sys
	import gradio as gr

	device = "cuda"
	os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
	os.system('pip3 install --upgrade pip')
	os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
	os.system('pip3 install moviepy')
	os.system('pip3 uninstall numpy')
	os.system('pip3 install --upgrade numpy')
	os.system('pip3 install speechRecognition')
	os.system('pip3 install gtts')
	os.system('pip3 install googletrans==3.1.0a0')
	os.system('pip3 install numba==0.48')
	os.system('pip3 install transformers')

	title = "Automatic translation and dubbing for Indic Languages"
	description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
	article = "Official Repo: https://github.com/Rudrabha/Wav2Lip"

	def inference(language,speed,voice,video):
	import moviepy.editor as mp
	clip = mp.VideoFileClip(video)
	clip.audio.write_audiofile(r"audio.wav")
	os.system('pip3 install pydub')
	os.system('pip3 install transformers==4.11.3 soundfile sentencepiece torchaudio librosa')
	speechlist = []
	from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
	import torch
	import torchaudio
	import librosa
	processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	def get_transcription(audio_path):
	speech, sr = librosa.load(audio_path, sr=16000)
	resampler = torchaudio.transforms.Resample(sr, 16000)
	speech = resampler(speech)
	input_values = processor(speech, return_tensors="pt", sampling_rate=16000)["input_values"]
	logits = model(input_values)["logits"]
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0])
	return transcription.lower()
	speechtext = get_transcription("audio.wav")
	speechlist.append(speechtext)
	text = " ".join(speechlist)
	from googletrans import Translator
	from gtts import gTTS
	translator= Translator()
	if speed == "Slow":
	con = True
	elif speed == "Fast":
	con = False
	if language == "Hindi":
	translation = translator.translate(text, src = 'en', dest='hi', slow=con)
	tts = gTTS(translation.text, lang= "hi")
	tts.save('input_audio.wav')
	elif language == "Tamil":
	translation = translator.translate(text, src = 'en', dest='ta', slow=con)
	tts = gTTS(translation.text, lang= "ta")
	tts.save('input_audio.wav')
	elif language == "Bengali":
	translation = translator.translate(text, src = 'en', dest='bn', slow=con)
	tts = gTTS(translation.text, lang= "hi")
	tts.save('input_audio.wav')
	elif language == "Telugu":
	translation = translator.translate(text, src = 'en', dest='te', slow=con)
	tts = gTTS(translation.text, lang= "hi")
	tts.save('input_audio.wav')
	audio = "input_audio.wav"
	os.system('mv ./Wav2Lip/* .')
	os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --face {} --audio {}".format(video, audio))
	return "./results/result_voice.mp4"

	iface = gr.Interface(inference, inputs=[gr.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.Video(format="mp4", sources="upload", label="Video to be Translated")], outputs=["video"], title=title, description=description, article=article)
	iface.launch(allowed_paths=["."])