Spaces:

ahmedJaafari
/

Annarabic

Runtime error

App Files Files

Annarabic / app.py

ahmedJaafari

Update app.py

c03e9ab almost 3 years ago

raw

history blame

2.46 kB

	import gradio as gr
	import numpy as np
	from transformers.file_utils import cached_path, hf_bucket_url
	import os
	from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC
	from datasets import load_dataset
	import torch
	import kenlm
	import torchaudio

	cache_dir = './cache/'
	processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken"))
	model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken"))

	# define function to read in sound file
	def speech_file_to_array_fn(path, max_seconds=120):
	batch = {"file": path}
	speech_array, sampling_rate = torchaudio.load(batch["file"])
	if sampling_rate != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
	new_freq=16000)
	speech_array = transform(speech_array)
	speech_array = speech_array[0]
	if max_seconds > 0:
	speech_array = speech_array[:max_seconds*16000]
	batch["speech"] = speech_array.numpy()
	batch["sampling_rate"] = 16000
	return batch

	# tokenize
	def inference(audio):
	# read in sound file
	# load dummy dataset and read soundfiles
	ds = speech_file_to_array_fn(audio)
	# infer model
	input_values = processor(
	ds["speech"],
	sampling_rate=ds["sampling_rate"],
	return_tensors="pt"
	).input_values
	# decode ctc output
	with torch.no_grad():
	logits = model(input_values).logits

	output = processor.decode(logits.numpy()[0]).text
	print(output)

	return output

	inputs = gr.inputs.Audio(label="Input Audio", type="filepath")
	outputs = gr.outputs.Textbox(label="Output Text")
	title = "Annarabic Speech Recognition System"
	description = 'Demo for <b>Annarabic ASR</b>. To use it, simply upload your audio, or click on one of the examples to load them. Only the 10 first seconds of the audio will be transcribed and GPU runtime is not used. For more information, contact Ahmed Jaafari via email: <a href = "mailto: a.jaafari@aui.ma">a.jaafari@aui.ma</a> or phone: <a href = "tel: +212658537105">+212658537105</a>.'
	examples=[['Aya.mp3'], ['Loubna.mp3'], ['Omar.wav'], ['Yassir.wav']]
	article="* The ASR never trained on the given examples."
	gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()