File size: 1,081 Bytes
86ff979
 
 
 
4bb6cd4
d80a989
86ff979
 
6b562eb
 
86ff979
 
 
 
4bb6cd4
 
 
 
86ff979
 
 
 
d60316b
86ff979
 
 
6b562eb
86ff979
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
import torch
import torchaudio
import torchaudio.functional as AF
from pydub import AudioSegment
from asr import Transcribe


def transcribe(audio_file, language: str):
    language_dict = {"Amharic":"amh","Oromo":"orm","Somali":"som"}
    freq = 16000
    # Return the transcript.
    transcript = ""
    # load the auido file to tensor
    audio = AudioSegment.from_file(audio_file.name)
    orig_freq = audio.frame_rate
    waveform = torch.tensor(audio.get_array_of_samples())
    waveform = (waveform / waveform.max()).unsqueeze(0)
    # resample audio to 16Khz
    if orig_freq != freq:
        waveform = AF.resample(waveform, orig_freq, freq)

    return transcriber(waveform, language_dict[language]), audio_file.name

if __name__ == "__main__":
    transcriber = Transcribe()
    inputs = [gr.File(), gr.Dropdown(choices=["Amharic", "Oromo", "Somali"])]
    outputs = [
        gr.Textbox(label="Transcript"),
        gr.Audio(label="Audio", type="filepath"),
    ]

    app = gr.Interface(transcribe, inputs=inputs, outputs=outputs)
    app.launch()