jesse-lopez commited on
Commit
e982ae0
1 Parent(s): 64643d4

test creation of app

Browse files
app.py CHANGED
@@ -1,16 +1,79 @@
1
- import pandas
2
- import fastai
3
- import librosa
4
- import pandas
5
- import pydub
6
- import torchaudio
 
 
7
  import gradio as gr
8
- import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
10
 
11
- def greet(name):
12
- return f"hello {name}"
13
 
14
 
15
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
16
  iface.launch()
1
+ """App to demonstrate fish sound classifier.
2
+
3
+ Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds
4
+ which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable.
5
+ """
6
+ import io
7
+
8
+ import fastai.vision.all as fai_vision
9
  import gradio as gr
10
+ import numpy as np
11
+ from PIL import Image
12
+
13
+ from create_spectrograms import (
14
+ FFTConfig,
15
+ load_wav,
16
+ calc_stft,
17
+ plot_spec,
18
+ fish_filter
19
+ )
20
+
21
+
22
+ MODEL = fai_vision.load_learner('fish-sounds-resnet101-balanced-samples-n50')
23
+ LABELS = {
24
+ 0: 'No call',
25
+ 1: 'Black grouper call 1',
26
+ 2: 'Black grouper call 2',
27
+ 3: 'Black grouper grunt',
28
+ 4: 'Unidentified sound',
29
+ 5: 'Red grouper 1',
30
+ 6: 'Red grouper 2',
31
+ 7: 'Red hind 1',
32
+ 8: 'Red hind 2',
33
+ 9: 'Red hind 3',
34
+ 10: 'Goliath grouper',
35
+ 11: 'Goliath grouper multi-phase'
36
+ }
37
+ FFT_CONFIG = FFTConfig()
38
+
39
+
40
+ def classify_audio(inp, model=MODEL, labels=LABELS):
41
+ with Spectrogram(inp) as spec_buffer:
42
+ # Open spec from in-memory file as image
43
+ image_buffer = Image.open(spec_buffer)
44
+ # Cast to array, skip alpha channel
45
+ image_arr = np.array(image_buffer)[:, :, :3]
46
+
47
+ # Predict!
48
+ results = model.predict(image_arr)
49
+ # Return class labels and confidence value
50
+ confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))}
51
+
52
+ return image_buffer, confidences
53
+
54
+
55
+ class Spectrogram:
56
+
57
+ def __init__(self, inp, fft_config=FFT_CONFIG):
58
+ self.inp = inp
59
+ self.buffer = io.BytesIO()
60
+ self.fft_config = fft_config
61
 
62
+ def __enter__(self):
63
+ plot_spec(self.inp, self.buffer, self.fft_config)
64
+ return self.buffer
65
 
66
+ def __exit__(self, exc_typ, exc_value, exc_traceback):
67
+ self.buffer.close()
68
 
69
 
70
+ iface = gr.Interface(
71
+ fn=classify_audio,
72
+ inputs=gr.inputs.Audio(source="upload", type="numpy"),
73
+ outputs=[
74
+ gr.outputs.Image(),
75
+ gr.outputs.Label(num_top_classes=3),
76
+ ],
77
+ examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"]
78
+ )
79
  iface.launch()
create_spectrograms.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Create spectrograms from audio files using matplotlib"""
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Tuple, Union
6
+
7
+ import matplotlib as mpl
8
+ mpl.use('Agg')
9
+ import librosa
10
+ import librosa.display
11
+
12
+ import matplotlib.pyplot as plt
13
+ import numpy as np
14
+ import scipy.signal as signal
15
+
16
+ logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO)
17
+
18
+
19
+ @dataclass
20
+ class FFTConfig():
21
+ n_fft: Union[int, None] = 2**12
22
+ win_length: Union[int, None] = None
23
+ hop_length: int = 512
24
+ sr: int = 22_050
25
+ db: bool = False
26
+ mel: bool = False
27
+ fmin: int = 50
28
+ fmax: int = 10_000
29
+ y_axis: str = 'linear'
30
+ denoise: Union[str, None] = None
31
+ pcen: bool = False
32
+ cmap: str = 'magma'
33
+ n_mels: int = 128
34
+ vmin: Union[float, None] = None
35
+ vmax: Union[float, None] = None
36
+ bandpass: bool = True
37
+ ylim: Union[Tuple[float, float], None] = (0, 512)
38
+
39
+ def load_wav(fpath):
40
+ y, sr = librosa.load(fpath)
41
+ audio, _ = librosa.effects.trim(y)
42
+
43
+ return audio, sr
44
+
45
+
46
+ def calc_stft(audio, fft_config):
47
+ stft = librosa.stft(audio, n_fft=fft_config.n_fft, hop_length=fft_config.hop_length, win_length=fft_config.win_length)
48
+ return np.abs(stft)
49
+
50
+
51
+ def plot_spec(inp, output, fft_config: FFTConfig):
52
+ # Audio returns sr and audio! (opposite of librosa)
53
+ sr, audio = inp
54
+ fft_config.sr = sr
55
+ if fft_config.bandpass:
56
+ audio = fish_filter(audio, fs=sr)
57
+
58
+ stft = calc_stft(audio, fft_config)
59
+
60
+ if fft_config.pcen:
61
+ # Scale PCEN: https://librosa.org/doc/latest/generated/librosa.pcen.html?highlight=pcen#librosa.pcen
62
+ stft = librosa.pcen(stft * (2**31), sr=fft_config.sr, hop_length=fft_config.hop_length)
63
+ fft_config.db = True
64
+
65
+ if fft_config.mel:
66
+ stft = librosa.feature.melspectrogram(
67
+ y=audio,
68
+ sr=fft_config.sr,
69
+ n_mels=fft_config.n_mels,
70
+ fmin=fft_config.fmin,
71
+ fmax=fft_config.fmax
72
+ )
73
+ # Mel is in db
74
+ fft_config.db = True
75
+
76
+ if fft_config.db:
77
+ stft = librosa.amplitude_to_db(stft, ref=np.max)
78
+
79
+ fig, ax = plt.subplots(1, 1)
80
+ _ = librosa.display.specshow(
81
+ stft,
82
+ sr=fft_config.sr,
83
+ hop_length=fft_config.hop_length,
84
+ x_axis='time',
85
+ y_axis=fft_config.y_axis,
86
+ fmin=fft_config.fmin,
87
+ fmax=fft_config.fmax,
88
+ cmap=fft_config.cmap,
89
+ ax=ax,
90
+ vmin=fft_config.vmin,
91
+ vmax=fft_config.vmax
92
+ )
93
+ ax.set_axis_off()
94
+ if fft_config.ylim is not None:
95
+ ax.set_ylim(fft_config.ylim)
96
+
97
+ if output:
98
+ fig.savefig(output, bbox_inches='tight', pad_inches=0)
99
+ plt.close(fig=fig)
100
+
101
+ plt.close('all')
102
+
103
+
104
+ def fish_filter(call, low=50, high=512, order=8, fs=22_050):
105
+ sos = signal.butter(order, [low, high], 'bandpass', output='sos', fs=fs)
106
+ return signal.sosfilt(sos, call)
fish-sounds-resnet101-balanced-samples-n50 ADDED
@@ -0,0 +1 @@
 
1
+ ../../models/fish-sounds-resnet101-balanced-samples-n50
requirements.txt CHANGED
@@ -3,4 +3,4 @@ fastai
3
  matplotlib
4
  pandas
5
  pydub
6
- torchaudio
3
  matplotlib
4
  pandas
5
  pydub
6
+ scipy
sample-0002.wav ADDED
Binary file (221 kB). View file
sample-20088.wav ADDED
Binary file (328 kB). View file
sample-2990.wav ADDED
Binary file (213 kB). View file