File size: 8,494 Bytes
936f6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58f8c9b
936f6fa
 
 
 
 
 
 
58f8c9b
936f6fa
 
58f8c9b
 
 
 
936f6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
58f8c9b
936f6fa
 
 
 
 
 
c932300
 
936f6fa
 
 
 
25ce57e
09a4143
c932300
 
09a4143
7cec2b9
c932300
09a4143
4a76f9b
58f8c9b
6354b1b
 
25ce57e
 
 
 
 
 
 
 
 
 
 
58f8c9b
6354b1b
 
25ce57e
 
 
 
 
58f8c9b
6354b1b
 
4a76f9b
936f6fa
58f8c9b
936f6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import os
import librosa
import soundfile as sf
import resampy
import numpy as np
from scores.srmr.srmr import SRMR
from scores.dnsmos.dnsmos import DNSMOS
from scores.pesq import PESQ
from scores.nb_pesq import NB_PESQ
from scores.sisdr import SISDR
from scores.stoi import STOI
from scores.fwsegsnr import FWSEGSNR
from scores.lsd import LSD
from scores.bsseval import BSSEval
from scores.snr import SNR
from scores.ssnr import SSNR
from scores.llr import LLR
from scores.csig import CSIG
from scores.cbak import CBAK
from scores.covl import COVL
from scores.mcd import MCD

def compute_mean_results(*results, round_digits=None):
    mean_result = {}

    # Use the first dictionary as a reference for keys
    for key in results[0]:
        # If the value is a nested dictionary, recurse
        if isinstance(results[0][key], dict):
            nested_results = [d[key] for d in results]
            mean_result[key] = compute_mean_results(*nested_results, round_digits=round_digits)
        # Otherwise, compute the mean of the values
        else:
            if round_digits is not None:
                mean_result[key] = round(sum(d[key] for d in results) / len(results), round_digits)
            else:
                mean_result[key] = sum(d[key] for d in results) / len(results)

    return mean_result

class ScoresList:
    def __init__(self):
        self.scores = []

    def __add__(self, score):
        self.scores += [score]
        return self

    def __str__(self):
        return 'Scores: ' + ' '.join([x.name for x in self.scores])

    def __call__(self, test_path, reference_path, window=None, score_rate=None, return_mean=False, round_digits=None):
        """
        window: float
            the window length in seconds to use for scoring the files.
        score_rate:
            the sampling rate specified for scoring the files.
        """
        if score_rate is None:
            score_rate = 16000
        if test_path is None:
            print(f'Please provide audio path for test_path')
            return
        results = {}
        if isinstance(test_path, tuple):  
            sr, audio = test_path
            if sr != score_rate:
                audio = resampy.resample(audio, sr, score_rate, axis=0)
            data = {}
            data['audio'] = [audio]
            data['rate'] = score_rate
            
            for score in self.scores:
                result_score = score.scoring(data, window, score_rate, round_digits)
                if result_score is not None:
                    results[score.name] = result_score
        else:    
            if os.path.isdir(test_path):
                audio_list = self.get_audio_list(test_path)
                if audio_list is None: return
                for audio_id in audio_list:
                    results_id = {}                
                    if reference_path is not None:
                        data = self.audio_reader(test_path+'/'+audio_id, reference_path+'/'+audio_id)
                    else:
                        data = self.audio_reader(test_path+'/'+audio_id, None)
                    for score in self.scores:
                        result_score = score.scoring(data, window, score_rate, round_digits)
                        if result_score is not None:
                            results_id[score.name] = result_score
                    results[audio_id] = results_id
                
            elif os.path.isfile(test_path):           
                data = self.audio_reader(test_path, reference_path)
                for score in self.scores:
                    result_score = score.scoring(data, window, score_rate, round_digits)
                    if result_score is not None:
                        results[score.name] = result_score
                
        if return_mean:
            mean_result = compute_mean_results(*results.values(), round_digits=round_digits)
            results['Mean_Score'] = mean_result

        return results

    def get_audio_list(self, path):
        # Initialize an empty list to store audio file names
        audio_list = []

        # Find all '.wav' audio files in the given path
        path_list = librosa.util.find_files(path, ext="wav")

        # If no '.wav' files are found, try to find '.flac' audio files instead
        if len(path_list) == 0:
            path_list = librosa.util.find_files(path, ext="flac")

        # If no audio files are found at all, print an error message and return None
        if len(path_list) == 0:
            print(f'No audio files found in {path}, scoring ended!')
            return None

        # Loop through the list of found audio file paths
        for audio_path in path_list:
            # Split the file path by '/' and append the last element (the file name) to the audio_list
            audio_path_s = audio_path.split('/')
            audio_list.append(audio_path_s[-1])

        # Return the list of audio file names
        return audio_list

    def audio_reader(self, test_path, reference_path):
        """loading sound files and making sure they all have the same lengths
            (zero-padding to the largest). Also works with numpy arrays.
        """
        data = {}
        audios = []
        maxlen = 0
        audio_test, rate_test = sf.read(test_path, always_2d=True)

        if audio_test.shape[1] > 1:
            audio_test = audio_test[..., 0, None]

        rate = rate_test
        if reference_path is not None:
            audio_ref, rate_ref = sf.read(reference_path, always_2d=True)
            if audio_ref.shape[1] > 1:
                audio_ref = audio_ref[..., 0, None]
            if rate_test != rate_ref:
                rate = min(rate_test, rate_ref)
            if rate_test != rate:
                audio_test = resampy.resample(audio_test, rate_test, rate, axis=0)
            if rate_ref != rate:
                audio_ref = resampy.resample(audio_ref, rate_ref, rate, axis=0)
            audios += [audio_test]
            audios += [audio_ref]
        else:
            audios += [audio_test]

        maxlen = 0
        for index, audio in enumerate(audios):
            maxlen = max(maxlen, audio.shape[0])
        ##padding
        for index, audio in enumerate(audios):
            if audio.shape[0] != maxlen:
                new = np.zeros((maxlen,))
                new[:audio.shape[0]] = audio[...,0]
                audios[index] = new
            else:
                audios[index] = audio[...,0]
        data['audio'] = audios
        data['rate'] = rate
        return data

def SpeechScore(scores=''):
    """ Load the desired scores inside a Metrics object that can then
    be called to compute all the desired scores.

    Parameters:
    ----------
    scores: str or list of str
        the scores matching any of these will be automatically loaded. this
        match is relative to the structure of the speechscores package.
        For instance:
        * 'absolute' will match all non-instrusive scores
        * 'absolute.srmr' or 'srmr' will only match SRMR
        * '' will match all

    Returns:
    --------

    A ScoresList object, that can be run to get the desired scores
    """

    score_cls = ScoresList()
    for score in scores:
        if score.lower() == 'srmr':
            score_cls += SRMR()
        elif score.lower() == 'pesq':
            score_cls += PESQ()
        elif score.lower() == 'nb_pesq':
            score_cls += NB_PESQ()
        elif score.lower() == 'stoi':
            score_cls += STOI()
        elif score.lower() == 'sisdr':
            score_cls += SISDR()
        elif score.lower() == 'fwsegsnr':
            score_cls += FWSEGSNR()
        elif score.lower() == 'lsd':
            score_cls += LSD()
        elif score.lower() == 'bsseval':
            score_cls += BSSEval()
        elif score.lower() == 'dnsmos':
            score_cls += DNSMOS()
        elif score.lower() == 'snr':
            score_cls += SNR()
        elif score.lower() == 'ssnr':
            score_cls += SSNR()
        elif score.lower() == 'llr':
            score_cls += LLR()
        elif score.lower() == 'csig':
            score_cls += CSIG()
        elif score.lower() == 'cbak':
            score_cls += CBAK()
        elif score.lower() == 'covl':
            score_cls += COVL()
        elif score.lower() == 'mcd':
            score_cls += MCD()
        else:
           print('score is pending implementation...')
    return score_cls