FrexG's picture
fix typo
d60316b
raw
history blame contribute delete
No virus
1.08 kB
import gradio as gr
import torch
import torchaudio
import torchaudio.functional as AF
from pydub import AudioSegment
from asr import Transcribe
def transcribe(audio_file, language: str):
language_dict = {"Amharic":"amh","Oromo":"orm","Somali":"som"}
freq = 16000
# Return the transcript.
transcript = ""
# load the auido file to tensor
audio = AudioSegment.from_file(audio_file.name)
orig_freq = audio.frame_rate
waveform = torch.tensor(audio.get_array_of_samples())
waveform = (waveform / waveform.max()).unsqueeze(0)
# resample audio to 16Khz
if orig_freq != freq:
waveform = AF.resample(waveform, orig_freq, freq)
return transcriber(waveform, language_dict[language]), audio_file.name
if __name__ == "__main__":
transcriber = Transcribe()
inputs = [gr.File(), gr.Dropdown(choices=["Amharic", "Oromo", "Somali"])]
outputs = [
gr.Textbox(label="Transcript"),
gr.Audio(label="Audio", type="filepath"),
]
app = gr.Interface(transcribe, inputs=inputs, outputs=outputs)
app.launch()