Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
from transformers.file_utils import cached_path, hf_bucket_url | |
import os | |
from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC | |
from datasets import load_dataset | |
import torch | |
import kenlm | |
import torchaudio | |
cache_dir = './cache/' | |
processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken")) | |
model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken")) | |
# define function to read in sound file | |
def speech_file_to_array_fn(path, max_seconds=120): | |
batch = {"file": path} | |
speech_array, sampling_rate = torchaudio.load(batch["file"]) | |
if sampling_rate != 16000: | |
transform = torchaudio.transforms.Resample(orig_freq=sampling_rate, | |
new_freq=16000) | |
speech_array = transform(speech_array) | |
speech_array = speech_array[0] | |
if max_seconds > 0: | |
speech_array = speech_array[:max_seconds*16000] | |
batch["speech"] = speech_array.numpy() | |
batch["sampling_rate"] = 16000 | |
return batch | |
# tokenize | |
def inference(audio): | |
# read in sound file | |
# load dummy dataset and read soundfiles | |
ds = speech_file_to_array_fn(audio) | |
# infer model | |
input_values = processor( | |
ds["speech"], | |
sampling_rate=ds["sampling_rate"], | |
return_tensors="pt" | |
).input_values | |
# decode ctc output | |
with torch.no_grad(): | |
logits = model(input_values).logits | |
output = processor.decode(logits.numpy()[0]).text | |
print(output) | |
return output | |
inputs = gr.inputs.Audio(label="Input Audio", type="filepath") | |
outputs = gr.outputs.Textbox(label="Output Text") | |
title = "Annarabic Speech Recognition System" | |
description = 'Demo for <b>Annarabic ASR</b>. To use it, simply upload your audio, or click on one of the examples to load them. Only the 10 first seconds of the audio will be transcribed and GPU runtime is not used. For more information, contact Ahmed Jaafari via email: <a href = "mailto: a.jaafari@aui.ma">a.jaafari@aui.ma</a> or phone: <a href = "tel: +212658537105">+212658537105</a>.' | |
examples=[['Aya.mp3'], ['Loubna.mp3'], ['Omar.wav'], ['Yassir.wav']] | |
article="* The ASR never trained on the given examples." | |
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch() |