Wave2Vec_Kyrgyz / app.py
Simonlob's picture
Update app.py
e864ca3 verified
raw
history blame
1.08 kB
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import numpy as np
import re
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = Wav2Vec2Processor.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
model = Wav2Vec2ForCTC.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
model.to(device = device)
def transcribe(file_):
arr_audio, _ = librosa.load(file_, sr=16000)
inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values.to(device = device), attention_mask=inputs.attention_mask.to(device = device)).logits
pred_ids = torch.argmax(logits, dim=-1)
text = processor.batch_decode(pred_ids)[0]
return text
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wave2Vec Kyrgyz",
description="Realtime demo for Kyrgyz speech recognition using a wave2vec model.",
)
iface.launch()