Spaces:
Running
Running
File size: 892 Bytes
936f6fa a7e2b53 936f6fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from basis import ScoreBasis
import numpy as np
import librosa
EPS = 1e-12
class LSD(ScoreBasis):
def __init__(self):
super(LSD, self).__init__(name='LSD')
self.intrusive = False
self.mono = True
def windowed_scoring(self, audios, score_rate):
if len(audios) != 2:
return None
est = wav_to_spectrogram(audios[1], score_rate)
target = wav_to_spectrogram(audios[0], score_rate)
return cal_LSD(est, target)
def wav_to_spectrogram(wav, rate):
hop_length = int(rate / 100)
n_fft = int(2048 / (48000 / rate))
spec = np.abs(librosa.stft(wav, hop_length=hop_length, n_fft=n_fft))
spec = np.transpose(spec, (1, 0))
return spec
def cal_LSD(est, target):
log_ratio = np.log10(target**2 / ((est + EPS) ** 2) + EPS) ** 2
lsd_ = np.mean(np.mean(log_ratio, axis=1) ** 0.5, axis=0)
return lsd_
|