import gradio as gr import torch import torchaudio import torchaudio.functional as AF from pydub import AudioSegment from asr import Transcribe def transcribe(audio_file, language: str): language_dict = {"Amharic":"amh","Oromo":"orm","Somali":"som"} freq = 16000 # Return the transcript. transcript = "" # load the auido file to tensor audio = AudioSegment.from_file(audio_file.name) orig_freq = audio.frame_rate waveform = torch.tensor(audio.get_array_of_samples()) waveform = (waveform / waveform.max()).unsqueeze(0) # resample audio to 16Khz if orig_freq != freq: waveform = AF.resample(waveform, orig_freq, freq) return transcriber(waveform, language_dict[language]), audio_file.name if __name__ == "__main__": transcriber = Transcribe() inputs = [gr.File(), gr.Dropdown(choices=["Amharic", "Oromo", "Somali"])] outputs = [ gr.Textbox(label="Transcript"), gr.Audio(label="Audio", type="filepath"), ] app = gr.Interface(transcribe, inputs=inputs, outputs=outputs) app.launch()