File size: 2,187 Bytes
e982ae0
 
 
 
 
 
 
 
d8186c5
e982ae0
76168a0
e982ae0
 
 
 
 
 
 
 
 
 
 
76168a0
e982ae0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8186c5
e982ae0
 
 
d8186c5
e982ae0
 
d8186c5
 
e982ae0
 
 
 
 
 
 
76168a0
 
e982ae0
d8186c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""App to demonstrate fish sound classifier.

Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds
which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable.
"""
import io

import fastai.vision.all as fai_vision
import gradio as gr
import numpy as np
from huggingface_hub import from_pretrained_fastai
from PIL import Image

from create_spectrograms import (
    FFTConfig,
    load_wav,
    calc_stft,
    plot_spec,
    fish_filter
)


MODEL = from_pretrained_fastai('axds/classify-fish-sounds')
LABELS = {
    0: 'No call',
    1: 'Black grouper call 1',
    2: 'Black grouper call 2',
    3: 'Black grouper grunt',
    4: 'Unidentified sound',
    5: 'Red grouper 1',
    6: 'Red grouper 2',
    7: 'Red hind 1',
    8: 'Red hind 2',
    9: 'Red hind 3',
    10: 'Goliath grouper',
    11: 'Goliath grouper multi-phase'
}
FFT_CONFIG = FFTConfig()


def classify_audio(inp, model=MODEL, labels=LABELS):
    with Spectrogram(inp) as spec_buffer:
        # Open spec from in-memory file as image
        image_buffer = Image.open(spec_buffer)
        # Cast to array, skip alpha channel
        image_arr = np.array(image_buffer)[:, :, :3]

    # Predict!
    results = model.predict(image_arr)
    # Return class labels and confidence value
    confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))}

    return image_buffer, confidences


class Spectrogram:

    def __init__(self, inp, fft_config=FFT_CONFIG):
        self.inp = inp
        self.buffer = io.BytesIO()
        self.fft_config = fft_config

    def __enter__(self):
        plot_spec(self.inp, self.buffer, self.fft_config)
        return self.buffer

    def __exit__(self, exc_typ, exc_value, exc_traceback):
        self.buffer.close()


iface = gr.Interface(
    fn=classify_audio,
    inputs=gr.inputs.Audio(source="upload", type="numpy"),
    outputs=[
        gr.outputs.Image(),
        gr.outputs.Label(num_top_classes=3),
    ],
    examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"],
    title="Classify fish sounds from audio files"
)
iface.launch()