|
import os |
|
from huggingface_hub import notebook_login |
|
import gradio as gr |
|
from transformers import pipeline |
|
from deep_translator import GoogleTranslator |
|
from gtts import gTTS |
|
import shutil |
|
from pytube import YouTube |
|
|
|
|
|
pipe = pipeline( |
|
"automatic-speech-recognition", |
|
model="nelanbu/ID2223_Lab2_Whisper", |
|
chunk_length_s=30, |
|
batch_size=16, |
|
stride_length_s=(4, 2), |
|
device_map="auto") |
|
|
|
def transcribe(audio_input, link_input, lang): |
|
try: |
|
|
|
if link_input: |
|
|
|
|
|
audio_streams = YouTube(link_input).streams.filter(only_audio=True).order_by('abr').desc() |
|
best_audio = audio_streams.first() |
|
audio=best_audio.download() |
|
|
|
|
|
elif audio_input: |
|
audio = audio_input |
|
|
|
|
|
|
|
|
|
result = pipe(audio) |
|
text = result['text'] |
|
print(f"Transcribed text: {text}") |
|
|
|
if lang == 'english': |
|
target_lang = 'en' |
|
elif lang == 'swedish': |
|
target_lang = 'sv' |
|
elif lang == 'italian': |
|
target_lang = 'it' |
|
elif lang == 'german': |
|
target_lang = 'de' |
|
elif lang == 'spanish': |
|
target_lang = 'es' |
|
elif lang == 'french': |
|
target_lang = 'fr' |
|
elif lang == 'danish': |
|
target_lang = 'da' |
|
elif lang == 'russian': |
|
target_lang = 'ru' |
|
elif lang == 'japanese': |
|
target_lang = 'ja' |
|
elif lang == 'chinese (simplified)': |
|
target_lang = 'zh-cn' |
|
|
|
translator = GoogleTranslator(source='auto', target=target_lang) |
|
translated_text = translator.translate(text) |
|
print(f"Translated text: {translated_text}") |
|
|
|
tts = gTTS(text=translated_text, tld='com', slow=False, lang=target_lang) |
|
output_audio_path = "test2.mp3" |
|
tts.save(output_audio_path) |
|
print(f"Saved TTS audio to {output_audio_path}") |
|
|
|
return text, translated_text, output_audio_path |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
raise |
|
|
|
demo = gr.Interface( |
|
fn=transcribe, |
|
inputs=[gr.Audio(type="filepath", label="Translate from microphone/MP3 input"), |
|
gr.Text(max_lines=1, label="Translate from YouTube URL"), |
|
gr.Dropdown( |
|
["english", "swedish", "italian", "german", "spanish", "french", "danish", "russian", "japanese", "chinese"], label="Language", info="pick the language you want to translate your auido"), |
|
], |
|
outputs=[gr.Text(label="transcribed text"), gr.Text(label="translated text"), gr.Audio(type="filepath", label="translated audio")], |
|
title="Turkish Audio Translator", |
|
description="You can upload YouTube link of a video in Turkish or use the microphone to record your voice or upload an MP3 file to translate Turkish audio to other languages." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |