File size: 892 Bytes
936f6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
a7e2b53
936f6fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from basis import ScoreBasis
import numpy as np
import librosa

EPS = 1e-12

class LSD(ScoreBasis):
    def __init__(self):
        super(LSD, self).__init__(name='LSD')
        self.intrusive = False
        self.mono = True

    def windowed_scoring(self, audios, score_rate):
        if len(audios) != 2:
            return None
        est = wav_to_spectrogram(audios[1], score_rate)
        target = wav_to_spectrogram(audios[0], score_rate)
        return cal_LSD(est, target)

def wav_to_spectrogram(wav, rate):
    hop_length = int(rate / 100)
    n_fft = int(2048 / (48000 / rate)) 
    spec = np.abs(librosa.stft(wav, hop_length=hop_length, n_fft=n_fft))
    spec = np.transpose(spec, (1, 0))
    return spec

def cal_LSD(est, target):
    log_ratio = np.log10(target**2 / ((est + EPS) ** 2) + EPS) ** 2
    lsd_ = np.mean(np.mean(log_ratio, axis=1) ** 0.5, axis=0)
    return lsd_