File size: 826 Bytes
e1f3556
70ccaef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d973c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import pipeline
import gradio as gr
from transformers import Wav2Vec2CTCTokenizer

preTrainedTokenizer = Wav2Vec2CTCTokenizer.from_pretrained("sukantan/wav2vec2-large-xls-r-300m-or-colab", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|", task="transcribe")
pipe = pipeline(model="sukantan/wav2vec2-large-xls-r-300m-or-colab", tokenizer=preTrainedTokenizer)  # change to "your-username/the-name-you-picked"

def transcribe(audio):
    text = pipe(audio)["text"]
    text = text.replace("<s>", "")
    return text

iface = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs="text",
    title="Wav2Vec2 Odia",
    description="Realtime demo for Odia speech recognition using a fine-tuned wav2vec2-large-xls-r-300m model.",
)

iface.launch()