Spaces:
Sleeping
Sleeping
File size: 2,225 Bytes
5292d32 864c16f 5292d32 a3f972f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import torch
from transformers import pipeline
#Load the ASR and Text Translation pipelines
asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
#Creating a function to transcribe speech. First we get the output as a text and then we feed that to the translator pipeline.
def transcribe_speech(filepath, target_language):
if not filepath:
return "No audio found, please retry.", []
# Transcribe the speech
output = asr_pipeline(filepath)
transcription = output["text"]
# Translate the transcribed text
text_translated = translator(transcription, src_lang="eng_Latn", tgt_lang=target_language)
return transcription, text_translated[0]['translation_text']
# Gradio interfaces for microphone and file upload
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=[
gr.Audio(sources="microphone", type="filepath"),
gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng")
],
outputs=[
gr.Textbox(label="Transcription", lines=3),
gr.Textbox(label="Translation", lines=5)
],
allow_flagging="never"
)
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=[
gr.Audio(sources="upload", type="filepath"),
gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng")
],
outputs=[
gr.Textbox(label="Transcription", lines=3),
gr.Textbox(label="Translation", lines=5)
],
allow_flagging="never"
)
# Choices are presented as the model expects. The language codes are provded in "Languages in FLORES-200" in the link below
#https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200
# Create the demo with tabbed interfaces
demo = gr.Blocks()
with demo:
gr.TabbedInterface(
[mic_transcribe, file_transcribe],
["Transcribe Microphone", "Transcribe Audio File"],
)
# Launch the Gradio demo
if __name__ == "__main__":
demo.launch() |