File size: 1,323 Bytes
e3ca44c
f77c608
952954d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# pip install transformers

from transformers import pipeline
import gradio as gr

model = pipeline(model="openai/whisper-base")
en_jp_translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-jap")

# "automatic-speech-recognition"
# transcriber = pipeline(model="openai/whisper-base")
# transcriber("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac")


def transcribe_audio(mic=None, file=None):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
    transcription = model(audio)["text"]
    return transcription

def translate_text(transcription):
    return en_jp_translator(transcription)[0]["translation_text"]

def combined_function(b):
    transcribe_audio(inputs=audio_file, outputs=text)
    translate_text(inputs=text, outputs=translate)

demo = gr.Blocks()

with demo:
    audio_file = gr.Audio(type="filepath")
    text = gr.Textbox()
    translate = gr.Textbox()

#    b1 = gr.Button("Recognize Speech & Translate")
    b1 = gr.Button("Recognize Speech")
    b2 = gr.Button("Translate")

#    b1.click(combined_function)
    b1.click(transcribe_audio, inputs=audio_file, outputs=text)
    b2.click(translate_text, inputs=text, outputs= translate)

demo.launch()