Spaces:
Running
Running
File size: 8,494 Bytes
936f6fa 58f8c9b 936f6fa 58f8c9b 936f6fa 58f8c9b 936f6fa 58f8c9b 936f6fa c932300 936f6fa 25ce57e 09a4143 c932300 09a4143 7cec2b9 c932300 09a4143 4a76f9b 58f8c9b 6354b1b 25ce57e 58f8c9b 6354b1b 25ce57e 58f8c9b 6354b1b 4a76f9b 936f6fa 58f8c9b 936f6fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import os
import librosa
import soundfile as sf
import resampy
import numpy as np
from scores.srmr.srmr import SRMR
from scores.dnsmos.dnsmos import DNSMOS
from scores.pesq import PESQ
from scores.nb_pesq import NB_PESQ
from scores.sisdr import SISDR
from scores.stoi import STOI
from scores.fwsegsnr import FWSEGSNR
from scores.lsd import LSD
from scores.bsseval import BSSEval
from scores.snr import SNR
from scores.ssnr import SSNR
from scores.llr import LLR
from scores.csig import CSIG
from scores.cbak import CBAK
from scores.covl import COVL
from scores.mcd import MCD
def compute_mean_results(*results, round_digits=None):
mean_result = {}
# Use the first dictionary as a reference for keys
for key in results[0]:
# If the value is a nested dictionary, recurse
if isinstance(results[0][key], dict):
nested_results = [d[key] for d in results]
mean_result[key] = compute_mean_results(*nested_results, round_digits=round_digits)
# Otherwise, compute the mean of the values
else:
if round_digits is not None:
mean_result[key] = round(sum(d[key] for d in results) / len(results), round_digits)
else:
mean_result[key] = sum(d[key] for d in results) / len(results)
return mean_result
class ScoresList:
def __init__(self):
self.scores = []
def __add__(self, score):
self.scores += [score]
return self
def __str__(self):
return 'Scores: ' + ' '.join([x.name for x in self.scores])
def __call__(self, test_path, reference_path, window=None, score_rate=None, return_mean=False, round_digits=None):
"""
window: float
the window length in seconds to use for scoring the files.
score_rate:
the sampling rate specified for scoring the files.
"""
if score_rate is None:
score_rate = 16000
if test_path is None:
print(f'Please provide audio path for test_path')
return
results = {}
if isinstance(test_path, tuple):
sr, audio = test_path
if sr != score_rate:
audio = resampy.resample(audio, sr, score_rate, axis=0)
data = {}
data['audio'] = [audio]
data['rate'] = score_rate
for score in self.scores:
result_score = score.scoring(data, window, score_rate, round_digits)
if result_score is not None:
results[score.name] = result_score
else:
if os.path.isdir(test_path):
audio_list = self.get_audio_list(test_path)
if audio_list is None: return
for audio_id in audio_list:
results_id = {}
if reference_path is not None:
data = self.audio_reader(test_path+'/'+audio_id, reference_path+'/'+audio_id)
else:
data = self.audio_reader(test_path+'/'+audio_id, None)
for score in self.scores:
result_score = score.scoring(data, window, score_rate, round_digits)
if result_score is not None:
results_id[score.name] = result_score
results[audio_id] = results_id
elif os.path.isfile(test_path):
data = self.audio_reader(test_path, reference_path)
for score in self.scores:
result_score = score.scoring(data, window, score_rate, round_digits)
if result_score is not None:
results[score.name] = result_score
if return_mean:
mean_result = compute_mean_results(*results.values(), round_digits=round_digits)
results['Mean_Score'] = mean_result
return results
def get_audio_list(self, path):
# Initialize an empty list to store audio file names
audio_list = []
# Find all '.wav' audio files in the given path
path_list = librosa.util.find_files(path, ext="wav")
# If no '.wav' files are found, try to find '.flac' audio files instead
if len(path_list) == 0:
path_list = librosa.util.find_files(path, ext="flac")
# If no audio files are found at all, print an error message and return None
if len(path_list) == 0:
print(f'No audio files found in {path}, scoring ended!')
return None
# Loop through the list of found audio file paths
for audio_path in path_list:
# Split the file path by '/' and append the last element (the file name) to the audio_list
audio_path_s = audio_path.split('/')
audio_list.append(audio_path_s[-1])
# Return the list of audio file names
return audio_list
def audio_reader(self, test_path, reference_path):
"""loading sound files and making sure they all have the same lengths
(zero-padding to the largest). Also works with numpy arrays.
"""
data = {}
audios = []
maxlen = 0
audio_test, rate_test = sf.read(test_path, always_2d=True)
if audio_test.shape[1] > 1:
audio_test = audio_test[..., 0, None]
rate = rate_test
if reference_path is not None:
audio_ref, rate_ref = sf.read(reference_path, always_2d=True)
if audio_ref.shape[1] > 1:
audio_ref = audio_ref[..., 0, None]
if rate_test != rate_ref:
rate = min(rate_test, rate_ref)
if rate_test != rate:
audio_test = resampy.resample(audio_test, rate_test, rate, axis=0)
if rate_ref != rate:
audio_ref = resampy.resample(audio_ref, rate_ref, rate, axis=0)
audios += [audio_test]
audios += [audio_ref]
else:
audios += [audio_test]
maxlen = 0
for index, audio in enumerate(audios):
maxlen = max(maxlen, audio.shape[0])
##padding
for index, audio in enumerate(audios):
if audio.shape[0] != maxlen:
new = np.zeros((maxlen,))
new[:audio.shape[0]] = audio[...,0]
audios[index] = new
else:
audios[index] = audio[...,0]
data['audio'] = audios
data['rate'] = rate
return data
def SpeechScore(scores=''):
""" Load the desired scores inside a Metrics object that can then
be called to compute all the desired scores.
Parameters:
----------
scores: str or list of str
the scores matching any of these will be automatically loaded. this
match is relative to the structure of the speechscores package.
For instance:
* 'absolute' will match all non-instrusive scores
* 'absolute.srmr' or 'srmr' will only match SRMR
* '' will match all
Returns:
--------
A ScoresList object, that can be run to get the desired scores
"""
score_cls = ScoresList()
for score in scores:
if score.lower() == 'srmr':
score_cls += SRMR()
elif score.lower() == 'pesq':
score_cls += PESQ()
elif score.lower() == 'nb_pesq':
score_cls += NB_PESQ()
elif score.lower() == 'stoi':
score_cls += STOI()
elif score.lower() == 'sisdr':
score_cls += SISDR()
elif score.lower() == 'fwsegsnr':
score_cls += FWSEGSNR()
elif score.lower() == 'lsd':
score_cls += LSD()
elif score.lower() == 'bsseval':
score_cls += BSSEval()
elif score.lower() == 'dnsmos':
score_cls += DNSMOS()
elif score.lower() == 'snr':
score_cls += SNR()
elif score.lower() == 'ssnr':
score_cls += SSNR()
elif score.lower() == 'llr':
score_cls += LLR()
elif score.lower() == 'csig':
score_cls += CSIG()
elif score.lower() == 'cbak':
score_cls += CBAK()
elif score.lower() == 'covl':
score_cls += COVL()
elif score.lower() == 'mcd':
score_cls += MCD()
else:
print('score is pending implementation...')
return score_cls
|