Spaces:

axds
/

classify-fish-sounds

Runtime error

App Files Files Community

jesse-lopez commited on Jun 24, 2022

Commit

e982ae0

•

1 Parent(s): 64643d4

test creation of app

Browse files

Files changed (7) hide show

app.py +73 -10
create_spectrograms.py +106 -0
fish-sounds-resnet101-balanced-samples-n50 +1 -0
requirements.txt +1 -1
sample-0002.wav +0 -0
sample-20088.wav +0 -0
sample-2990.wav +0 -0

app.py CHANGED Viewed

@@ -1,16 +1,79 @@
-import pandas
-import fastai
-import librosa
-import pandas
-import pydub
-import torchaudio
 import gradio as gr
-import matplotlib.pyplot as plt
-def greet(name):
-    return f"hello {name}"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 iface.launch()

+"""App to demonstrate fish sound classifier.
+Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds
+which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable.
+"""
+import io
+import fastai.vision.all as fai_vision
 import gradio as gr
+import numpy as np
+from PIL import Image
+from create_spectrograms import (
+    FFTConfig,
+    load_wav,
+    calc_stft,
+    plot_spec,
+    fish_filter
+)
+MODEL = fai_vision.load_learner('fish-sounds-resnet101-balanced-samples-n50')
+LABELS = {
+    0: 'No call',
+    1: 'Black grouper call 1',
+    2: 'Black grouper call 2',
+    3: 'Black grouper grunt',
+    4: 'Unidentified sound',
+    5: 'Red grouper 1',
+    6: 'Red grouper 2',
+    7: 'Red hind 1',
+    8: 'Red hind 2',
+    9: 'Red hind 3',
+    10: 'Goliath grouper',
+    11: 'Goliath grouper multi-phase'
+}
+FFT_CONFIG = FFTConfig()
+def classify_audio(inp, model=MODEL, labels=LABELS):
+    with Spectrogram(inp) as spec_buffer:
+        # Open spec from in-memory file as image
+        image_buffer = Image.open(spec_buffer)
+        # Cast to array, skip alpha channel
+        image_arr = np.array(image_buffer)[:, :, :3]
+    # Predict!
+    results = model.predict(image_arr)
+    # Return class labels and confidence value
+    confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))}
+    return image_buffer, confidences
+class Spectrogram:
+    def __init__(self, inp, fft_config=FFT_CONFIG):
+        self.inp = inp
+        self.buffer = io.BytesIO()
+        self.fft_config = fft_config
+    def __enter__(self):
+        plot_spec(self.inp, self.buffer, self.fft_config)
+        return self.buffer
+    def __exit__(self, exc_typ, exc_value, exc_traceback):
+        self.buffer.close()
+iface = gr.Interface(
+    fn=classify_audio,
+    inputs=gr.inputs.Audio(source="upload", type="numpy"),
+    outputs=[
+        gr.outputs.Image(),
+        gr.outputs.Label(num_top_classes=3),
+    ],
+    examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"]
+)
 iface.launch()

create_spectrograms.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""Create spectrograms from audio files using matplotlib"""
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Tuple, Union
+import matplotlib as mpl
+mpl.use('Agg')
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.signal as signal
+logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO)
+@dataclass
+class FFTConfig():
+    n_fft: Union[int, None] = 2**12
+    win_length: Union[int, None] = None
+    hop_length: int = 512
+    sr: int = 22_050
+    db: bool = False
+    mel: bool = False
+    fmin: int = 50
+    fmax: int = 10_000
+    y_axis: str = 'linear'
+    denoise: Union[str, None] = None
+    pcen: bool = False
+    cmap: str = 'magma'
+    n_mels: int = 128
+    vmin: Union[float, None] = None
+    vmax: Union[float, None] = None
+    bandpass: bool = True
+    ylim: Union[Tuple[float, float], None] = (0, 512)
+def load_wav(fpath):
+    y, sr = librosa.load(fpath)
+    audio, _ = librosa.effects.trim(y)
+    return audio, sr
+def calc_stft(audio, fft_config):
+    stft = librosa.stft(audio, n_fft=fft_config.n_fft, hop_length=fft_config.hop_length, win_length=fft_config.win_length)
+    return np.abs(stft)
+def plot_spec(inp, output, fft_config: FFTConfig):
+    # Audio returns sr and audio! (opposite of librosa)
+    sr, audio = inp
+    fft_config.sr = sr
+    if fft_config.bandpass:
+        audio = fish_filter(audio, fs=sr)
+    stft = calc_stft(audio, fft_config)
+    if fft_config.pcen:
+        # Scale PCEN: https://librosa.org/doc/latest/generated/librosa.pcen.html?highlight=pcen#librosa.pcen
+        stft = librosa.pcen(stft * (2**31), sr=fft_config.sr, hop_length=fft_config.hop_length)
+        fft_config.db = True
+    if fft_config.mel:
+        stft = librosa.feature.melspectrogram(
+            y=audio,
+            sr=fft_config.sr,
+            n_mels=fft_config.n_mels,
+            fmin=fft_config.fmin,
+            fmax=fft_config.fmax
+        )
+        # Mel is in db
+        fft_config.db = True
+    if fft_config.db:
+        stft = librosa.amplitude_to_db(stft, ref=np.max)
+    fig, ax = plt.subplots(1, 1)
+    _ = librosa.display.specshow(
+        stft,
+        sr=fft_config.sr,
+        hop_length=fft_config.hop_length,
+        x_axis='time',
+        y_axis=fft_config.y_axis,
+        fmin=fft_config.fmin,
+        fmax=fft_config.fmax,
+        cmap=fft_config.cmap,
+        ax=ax,
+        vmin=fft_config.vmin,
+        vmax=fft_config.vmax
+    )
+    ax.set_axis_off()
+    if fft_config.ylim is not None:
+        ax.set_ylim(fft_config.ylim)
+    if output:
+        fig.savefig(output, bbox_inches='tight', pad_inches=0)
+        plt.close(fig=fig)
+    plt.close('all')
+def fish_filter(call, low=50, high=512, order=8, fs=22_050):
+    sos = signal.butter(order, [low, high], 'bandpass', output='sos', fs=fs)
+    return signal.sosfilt(sos, call)

fish-sounds-resnet101-balanced-samples-n50 ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../models/fish-sounds-resnet101-balanced-samples-n50

requirements.txt CHANGED Viewed

@@ -3,4 +3,4 @@ fastai
 matplotlib
 pandas
 pydub
-torchaudio

 matplotlib
 pandas
 pydub
+scipy

sample-0002.wav ADDED Viewed

Binary file (221 kB). View file

sample-20088.wav ADDED Viewed

Binary file (328 kB). View file

sample-2990.wav ADDED Viewed

Binary file (213 kB). View file