jesse-lopez's picture
Use model pushed to hf-hub
76168a0
"""App to demonstrate fish sound classifier.
Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds
which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable.
"""
import io
import fastai.vision.all as fai_vision
import gradio as gr
import numpy as np
from huggingface_hub import from_pretrained_fastai
from PIL import Image
from create_spectrograms import (
FFTConfig,
load_wav,
calc_stft,
plot_spec,
fish_filter
)
MODEL = from_pretrained_fastai('axds/classify-fish-sounds')
LABELS = {
0: 'No call',
1: 'Black grouper call 1',
2: 'Black grouper call 2',
3: 'Black grouper grunt',
4: 'Unidentified sound',
5: 'Red grouper 1',
6: 'Red grouper 2',
7: 'Red hind 1',
8: 'Red hind 2',
9: 'Red hind 3',
10: 'Goliath grouper',
11: 'Goliath grouper multi-phase'
}
FFT_CONFIG = FFTConfig()
def classify_audio(inp, model=MODEL, labels=LABELS):
with Spectrogram(inp) as spec_buffer:
# Open spec from in-memory file as image
image_buffer = Image.open(spec_buffer)
# Cast to array, skip alpha channel
image_arr = np.array(image_buffer)[:, :, :3]
# Predict!
results = model.predict(image_arr)
# Return class labels and confidence value
confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))}
return image_buffer, confidences
class Spectrogram:
def __init__(self, inp, fft_config=FFT_CONFIG):
self.inp = inp
self.buffer = io.BytesIO()
self.fft_config = fft_config
def __enter__(self):
plot_spec(self.inp, self.buffer, self.fft_config)
return self.buffer
def __exit__(self, exc_typ, exc_value, exc_traceback):
self.buffer.close()
iface = gr.Interface(
fn=classify_audio,
inputs=gr.inputs.Audio(source="upload", type="numpy"),
outputs=[
gr.outputs.Image(),
gr.outputs.Label(num_top_classes=3),
],
examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"],
title="Classify fish sounds from audio files"
)
iface.launch()