File size: 2,225 Bytes
5292d32
864c16f
5292d32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3f972f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import torch
from transformers import pipeline
#Load the ASR and Text Translation pipelines
asr_pipeline = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
#Creating a function to transcribe speech. First we get the output as a text and then we feed that to the translator pipeline.

def transcribe_speech(filepath, target_language):
    if not filepath:
        return "No audio found, please retry.", []

    # Transcribe the speech
    output = asr_pipeline(filepath)
    transcription = output["text"]

    # Translate the transcribed text
    text_translated = translator(transcription, src_lang="eng_Latn", tgt_lang=target_language)

    return transcription, text_translated[0]['translation_text']
# Gradio interfaces for microphone and file upload
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
        gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng")
    ],
    outputs=[
        gr.Textbox(label="Transcription", lines=3),
        gr.Textbox(label="Translation", lines=5)
    ],
    allow_flagging="never"
)

file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=[
        gr.Audio(sources="upload", type="filepath"),
        gr.Dropdown(label="Target Language", choices=["ben_Beng", "hin_Deva", "fra_Latn", "spa_Latn", "deu_Latn"], value="ben_Beng")
    ],
    outputs=[
        gr.Textbox(label="Transcription", lines=3),
        gr.Textbox(label="Translation", lines=5)
    ],
    allow_flagging="never"
)

# Choices are presented as the model expects. The language codes are provded in "Languages in FLORES-200" in the link below
#https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200

# Create the demo with tabbed interfaces
demo = gr.Blocks()

with demo:
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"],
    )

# Launch the Gradio demo
if __name__ == "__main__":
    demo.launch()