Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import torchaudio | |
from datasets import load_dataset | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
processor = Wav2Vec2Processor.from_pretrained("gagan3012/wav2vec2-xlsr-nepali") | |
model = Wav2Vec2ForCTC.from_pretrained("gagan3012/wav2vec2-xlsr-nepali") | |
from torchaudio.transforms import Resample | |
import numpy as np | |
def transcribe_audio(audio_file): | |
input_arr, sampling_rate =torchaudio.load(audio_file) | |
resampler = Resample(orig_freq=sampling_rate, new_freq=16000) | |
input_arr = resampler(input_arr).squeeze().numpy() | |
sampling_rate = 16000 | |
inputs = processor(input_arr, sampling_rate=16_000, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
predicted_words= processor.batch_decode(predicted_ids) | |
return predicted_words[0] | |
audio_input = gr.inputs.Audio(source="upload", type="filepath") | |
iface = gr.Interface(fn=transcribe_audio, inputs=audio_input, | |
outputs=["textbox"], title="Speech To Text", | |
description="Upload an audio file and hit the 'Submit'\ | |
button") | |
iface.launch(inline=False) | |