Wave2Vec_Kyrgyz / app.py
Simonlob's picture
Update app.py
2e3940c verified
raw
history blame
No virus
974 Bytes
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
rocessor = Wav2Vec2Processor.from_pretrained("adilism/wav2vec2-large-xlsr-kyrgyz")
model = Wav2Vec2ForCTC.from_pretrained("adilism/wav2vec2-large-xlsr-kyrgyz")
# model.to("cuda")
def transcribe(file_):
arr_audio, _ librosa.load(file, rate=16000)
inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Kyrgyz-STT-Small",
description="Realtime demo for Kyrgyz speech recognition using a fine-tuned Whisper small model.",
)
iface.launch()