Spaces:

alibabasglab
/

SpeechScore

Running

File size: 4,458 Bytes

936f6fa

class ScoreBasis:
    def __init__(self, name=None):
        # the score operates on the specified rate
        self.score_rate = None
        # is the score intrusive or non-intrusive ?
        self.intrusive = True #require a reference
        self.name = name

    def windowed_scoring(self, audios, score_rate):
        raise NotImplementedError(f'In {self.name}, windowed_scoring is not yet implemented')

    def scoring(self, data, window=None, score_rate=None):
        """ calling the `windowed_scoring` function that should be specialised
        depending on the score."""

        # imports
        #import soundfile as sf
        import resampy
        from museval.metrics import Framing

        #checking rate
        audios = data['audio']
        score_rate = data['rate']

        if self.score_rate is not None:
            score_rate = self.score_rate

        if score_rate != data['rate']:
            for index, audio in enumerate(audios):
                audio = resampy.resample(audio, data['rate'], score_rate, axis=0)
                audios[index] = audio

        if window is not None:
            framer = Framing(window * score_rate, window * score_rate, maxlen)
            nwin = framer.nwin
            result = {}
            for (t, win) in enumerate(framer):
                result_t = self.windowed_scoring([audio[win] for audio in audios], score_rate)
                result[t] = result_t
        else:
            result = self.windowed_scoring(audios, score_rate)
        return result
        """
        audios = []
        maxlen = 0
        if isinstance(test_files, str):
            test_files = [test_files]
        print(f'test_files: {test_files}')
        if not self.intrusive and len(test_files) > 1:
            if self.verbose:
                print('  [%s] is non-intrusive. Processing first file only'
                      % self.name)
            test_files = [test_files[0],]
        for file in test_files:
            # Loading sound file
            if isinstance(file, str):
                audio, rate = sf.read(file, always_2d=True)
            else:
                rate = array_rate
                if rate is None:
                    raise ValueError('Sampling rate needs to be specified '
                                     'when feeding numpy arrays.')
                audio = file
                # Standardize shapes
                if len(audio.shape) == 1:
                    audio = audio[:, None]
                if len(audio.shape) != 2:
                    raise ValueError('Please provide 1D or 2D array, received '
                                     '{}D array'.format(len(audio.shape)))

            if self.fixed_rate is not None and rate != self.fixed_rate:
                if self.verbose:
                    print('  [%s] preferred is %dkHz rate. resampling'
                          % (self.name, self.fixed_rate))
                audio = resampy.resample(audio, rate, self.fixed_rate, axis=0)
                rate = self.fixed_rate
            if self.mono and audio.shape[1] > 1:
                if self.verbose:
                    print('  [%s] only supports mono. Will use first channel'
                          % self.name)
                audio = audio[..., 0, None]
            if self.mono:
                audio = audio[..., 0]
            maxlen = max(maxlen, audio.shape[0])
            audios += [audio]
        audio = audios[1]
        audio[:maxlen-320] = audio[320:]
        audios[1] = audio
        for index, audio in enumerate(audios):
            if audio.shape[0] != maxlen:
                new = np.zeros((maxlen,) + audio.shape[1:])
                new[:audio.shape[0]] = audio
                audios[index] = new

        if self.window is not None:
            framer = Framing(self.window * rate,
                             self.hop * rate, maxlen)
            nwin = framer.nwin
            result = {}
            for (t, win) in enumerate(framer):
                result_t = self.test_window([audio[win] for audio in audios],
                                            rate)
                #or metric in result_t.keys():
                #   if metric not in result.keys():
                #       result[metric] = np.empty(nwin)
                #   result[metric][t] = result_t[metric]
                result[t] = result_t
        else:
            result = self.test_window(audios, rate)
        return result
        """