|
rom transformers import pipeline |
|
import gradio as gr |
|
from transformers import Wav2Vec2CTCTokenizer |
|
|
|
preTrainedTokenizer = Wav2Vec2CTCTokenizer.from_pretrained("sukantan/wav2vec2-large-xls-r-300m-or-colab", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|", task="transcribe") |
|
pipe = pipeline(model="sukantan/wav2vec2-large-xls-r-300m-or-colab", tokenizer=preTrainedTokenizer) |
|
|
|
def transcribe(audio): |
|
text = pipe(audio)["text"] |
|
text = text.replace("<s>", "") |
|
return text |
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(source="microphone", type="filepath"), |
|
outputs="text", |
|
title="Wav2Vec2 Odia", |
|
description="Realtime demo for Odia speech recognition using a fine-tuned wav2vec2-large-xls-r-300m model.", |
|
) |
|
|
|
iface.launch(share=True) |
|
|