Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
import os | |
import deepl | |
from pytube import YouTube | |
TARGET_LANG = "EN-GB" | |
deepl_key = os.environ.get('DEEPL_KEY') | |
translator = deepl.Translator(deepl_key) | |
pipe = pipeline("automatic-speech-recognition", model="FredBonux/whisper-small-it") | |
def transcribe(audio): | |
ita = pipe(audio)["text"] | |
eng = translator.translate_text(ita, target_lang=TARGET_LANG).text | |
print(f"{ita} -> {eng}") | |
return ita, eng | |
def transcribe_url(url): | |
youtube = YouTube(str(url)) | |
print("Downloading video") | |
audio = youtube.streams.filter(only_audio=True).first().download('yt_video') | |
print("Downloaded") | |
text_it = pipe(audio)["text"] | |
print(f"{text_it}") | |
text_en = translator.translate_text(text_it, target_lang=TARGET_LANG).text | |
print(f"{text_en}") | |
return text_it, text_en | |
url_demo = gr.Interface( | |
fn=transcribe_url, | |
inputs="text", | |
outputs=[gr.Textbox(label="Transcribed text"), | |
gr.Textbox(label="English translation")], | |
title="Italian video to english text", | |
description="Transcribing italian video to text and translating it to english!", | |
) | |
voice_demo = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
outputs=[gr.Textbox(label="Transcribed text"), | |
gr.Textbox(label="English translation")], | |
title="Italian recorded speech to english text", | |
description="Transcribing italian speech to text and translating it to english!", | |
) | |
app = gr.TabbedInterface([url_demo, voice_demo], ["Video to English Text", "Audio to English Text"]) | |
app.launch() |