osanseviero
/

asr-with-transformers-wav2vec2

Automatic Speech Recognition PyTorch TensorFlow

English superb wav2vec2 audio

Model card Files Files and versions Community

asr-with-transformers-wav2vec2 / model.py

osanseviero's picture

osanseviero HF staff

Update model.py

7464ce6 almost 3 years ago

raw history blame contribute delete

No virus

2.13 kB

	import numpy as np
	from transformers import AutomaticSpeechRecognitionPipeline, AutoTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC
	from typing import Dict

	class PreTrainedModel():
	def __init__(self, path):
	"""
	Loads model and tokenizer from local directory
	"""
	model = Wav2Vec2ForCTC.from_pretrained(path)
	tokenizer = AutoTokenizer.from_pretrained(path)
	extractor = Wav2Vec2FeatureExtractor.from_pretrained(path)

	self.model = AutomaticSpeechRecognitionPipeline(model=model, feature_extractor=extractor, tokenizer=tokenizer)
	def __call__(self, inputs)-> Dict[str, str]:
	"""
	Args:
	inputs (:obj:`np.array`):
	The raw waveform of audio received. By default at 16KHz.
	Return:
	A :obj:`dict`:. The object return should be liked {"text": "XXX"} containing
	the detected text from the input audio.
	"""
	return self.model(inputs)

	# Uncomment to load model
	# model = PreTrainedModel()

	"""
	# Just an example using this.
	import subprocess
	from datasets import load_dataset

	def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
	ar = f"{sampling_rate}"
	ac = "1"
	format_for_conversion = "f32le"
	ffmpeg_command = [
	"ffmpeg",
	"-i",
	"pipe:0",
	"-ac",
	ac,
	"-ar",
	ar,
	"-f",
	format_for_conversion,
	"-hide_banner",
	"-loglevel",
	"quiet",
	"pipe:1",
	]

	ffmpeg_process = subprocess.Popen(
	ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
	)
	output_stream = ffmpeg_process.communicate(bpayload)
	out_bytes = output_stream[0]

	audio = np.frombuffer(out_bytes, np.float32).copy()
	if audio.shape[0] == 0:
	raise ValueError("Malformed soundfile")
	return audio

	model = PreTrainedModel()
	ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
	filename = ds[0]["file"]
	with open(filename, "rb") as f:
	data = ffmpeg_read(f.read(), 16000)
	print(model(data))
	"""