Spaces:

alibabasglab
/

SpeechScore

Running

App Files Files Community

SpeechScore / basis.py

alibabasglab

Upload 73 files

936f6fa verified 4 months ago

raw

history blame

4.46 kB

	class ScoreBasis:
	def __init__(self, name=None):
	# the score operates on the specified rate
	self.score_rate = None
	# is the score intrusive or non-intrusive ?
	self.intrusive = True #require a reference
	self.name = name

	def windowed_scoring(self, audios, score_rate):
	raise NotImplementedError(f'In {self.name}, windowed_scoring is not yet implemented')

	def scoring(self, data, window=None, score_rate=None):
	""" calling the `windowed_scoring` function that should be specialised
	depending on the score."""

	# imports
	#import soundfile as sf
	import resampy
	from museval.metrics import Framing

	#checking rate
	audios = data['audio']
	score_rate = data['rate']

	if self.score_rate is not None:
	score_rate = self.score_rate

	if score_rate != data['rate']:
	for index, audio in enumerate(audios):
	audio = resampy.resample(audio, data['rate'], score_rate, axis=0)
	audios[index] = audio

	if window is not None:
	framer = Framing(window * score_rate, window * score_rate, maxlen)
	nwin = framer.nwin
	result = {}
	for (t, win) in enumerate(framer):
	result_t = self.windowed_scoring([audio[win] for audio in audios], score_rate)
	result[t] = result_t
	else:
	result = self.windowed_scoring(audios, score_rate)
	return result
	"""
	audios = []
	maxlen = 0
	if isinstance(test_files, str):
	test_files = [test_files]
	print(f'test_files: {test_files}')
	if not self.intrusive and len(test_files) > 1:
	if self.verbose:
	print(' [%s] is non-intrusive. Processing first file only'
	% self.name)
	test_files = [test_files[0],]
	for file in test_files:
	# Loading sound file
	if isinstance(file, str):
	audio, rate = sf.read(file, always_2d=True)
	else:
	rate = array_rate
	if rate is None:
	raise ValueError('Sampling rate needs to be specified '
	'when feeding numpy arrays.')
	audio = file
	# Standardize shapes
	if len(audio.shape) == 1:
	audio = audio[:, None]
	if len(audio.shape) != 2:
	raise ValueError('Please provide 1D or 2D array, received '
	'{}D array'.format(len(audio.shape)))

	if self.fixed_rate is not None and rate != self.fixed_rate:
	if self.verbose:
	print(' [%s] preferred is %dkHz rate. resampling'
	% (self.name, self.fixed_rate))
	audio = resampy.resample(audio, rate, self.fixed_rate, axis=0)
	rate = self.fixed_rate
	if self.mono and audio.shape[1] > 1:
	if self.verbose:
	print(' [%s] only supports mono. Will use first channel'
	% self.name)
	audio = audio[..., 0, None]
	if self.mono:
	audio = audio[..., 0]
	maxlen = max(maxlen, audio.shape[0])
	audios += [audio]
	audio = audios[1]
	audio[:maxlen-320] = audio[320:]
	audios[1] = audio
	for index, audio in enumerate(audios):
	if audio.shape[0] != maxlen:
	new = np.zeros((maxlen,) + audio.shape[1:])
	new[:audio.shape[0]] = audio
	audios[index] = new

	if self.window is not None:
	framer = Framing(self.window * rate,
	self.hop * rate, maxlen)
	nwin = framer.nwin
	result = {}
	for (t, win) in enumerate(framer):
	result_t = self.test_window([audio[win] for audio in audios],
	rate)
	#or metric in result_t.keys():
	# if metric not in result.keys():
	# result[metric] = np.empty(nwin)
	# result[metric][t] = result_t[metric]
	result[t] = result_t
	else:
	result = self.test_window(audios, rate)
	return result
	"""