File size: 2,894 Bytes
e982ae0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Create spectrograms from audio files using matplotlib"""
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple, Union

import matplotlib as mpl
mpl.use('Agg')
import librosa
import librosa.display

import matplotlib.pyplot as plt
import numpy as np
import scipy.signal as signal

logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO)


@dataclass
class FFTConfig():
    n_fft: Union[int, None] = 2**12
    win_length: Union[int, None] = None
    hop_length: int = 512
    sr: int = 22_050
    db: bool = False
    mel: bool = False
    fmin: int = 50
    fmax: int = 10_000
    y_axis: str = 'linear'
    denoise: Union[str, None] = None
    pcen: bool = False
    cmap: str = 'magma'
    n_mels: int = 128
    vmin: Union[float, None] = None
    vmax: Union[float, None] = None
    bandpass: bool = True
    ylim: Union[Tuple[float, float], None] = (0, 512)

def load_wav(fpath):
    y, sr = librosa.load(fpath)
    audio, _ = librosa.effects.trim(y)

    return audio, sr


def calc_stft(audio, fft_config):
    stft = librosa.stft(audio, n_fft=fft_config.n_fft, hop_length=fft_config.hop_length, win_length=fft_config.win_length)
    return np.abs(stft)


def plot_spec(inp, output, fft_config: FFTConfig):
    # Audio returns sr and audio! (opposite of librosa)
    sr, audio = inp
    fft_config.sr = sr
    if fft_config.bandpass:
        audio = fish_filter(audio, fs=sr)

    stft = calc_stft(audio, fft_config)

    if fft_config.pcen:
        # Scale PCEN: https://librosa.org/doc/latest/generated/librosa.pcen.html?highlight=pcen#librosa.pcen
        stft = librosa.pcen(stft * (2**31), sr=fft_config.sr, hop_length=fft_config.hop_length)
        fft_config.db = True

    if fft_config.mel:
        stft = librosa.feature.melspectrogram(
            y=audio,
            sr=fft_config.sr,
            n_mels=fft_config.n_mels,
            fmin=fft_config.fmin,
            fmax=fft_config.fmax
        )
        # Mel is in db
        fft_config.db = True

    if fft_config.db:
        stft = librosa.amplitude_to_db(stft, ref=np.max)

    fig, ax = plt.subplots(1, 1)
    _ = librosa.display.specshow(
        stft,
        sr=fft_config.sr,
        hop_length=fft_config.hop_length,
        x_axis='time',
        y_axis=fft_config.y_axis,
        fmin=fft_config.fmin,
        fmax=fft_config.fmax,
        cmap=fft_config.cmap,
        ax=ax,
        vmin=fft_config.vmin,
        vmax=fft_config.vmax
    )
    ax.set_axis_off()
    if fft_config.ylim is not None:
        ax.set_ylim(fft_config.ylim)

    if output:
        fig.savefig(output, bbox_inches='tight', pad_inches=0)
        plt.close(fig=fig)

    plt.close('all')


def fish_filter(call, low=50, high=512, order=8, fs=22_050):
    sos = signal.butter(order, [low, high], 'bandpass', output='sos', fs=fs)
    return signal.sosfilt(sos, call)