|
import gradio as gr |
|
import numpy as np |
|
from transformers import pipeline |
|
import torch |
|
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
transcriber = pipeline("automatic-speech-recognition", model="mahimairaja/whisper-base-tamil", \ |
|
chunk_length_s=15, device=device) |
|
transcriber.model.config.forced_decoder_ids = transcriber.tokenizer.get_decoder_prompt_ids(language="ta", task="transcribe") |
|
|
|
def transcribe(audio): |
|
return transcriber(audio)["text"] |
|
|
|
TITLE = "ASR for ALL - Democratizing Tamil" |
|
|
|
demo = gr.Blocks() |
|
|
|
mic_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(sources="microphone", type="filepath"), |
|
outputs="text", |
|
title=TITLE, |
|
) |
|
|
|
file_transcribe = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(sources="upload", type="filepath"), |
|
outputs="text", |
|
examples=[ |
|
"assets/tamil-audio-01.mp3", |
|
"assets/tamil-audio-02.mp3", |
|
"assets/tamil-audio-03.mp3", |
|
"assets/tamil-audio-04.mp3", |
|
], |
|
title=TITLE, |
|
) |
|
|
|
|
|
with demo: |
|
gr.TabbedInterface( |
|
[mic_transcribe, file_transcribe], |
|
["Real Time Transcription", "Audio File", ] |
|
) |
|
|
|
demo.launch() |