Spaces:
Runtime error
Runtime error
File size: 2,187 Bytes
e982ae0 d8186c5 e982ae0 76168a0 e982ae0 76168a0 e982ae0 d8186c5 e982ae0 d8186c5 e982ae0 d8186c5 e982ae0 76168a0 e982ae0 d8186c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
"""App to demonstrate fish sound classifier.
Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds
which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable.
"""
import io
import fastai.vision.all as fai_vision
import gradio as gr
import numpy as np
from huggingface_hub import from_pretrained_fastai
from PIL import Image
from create_spectrograms import (
FFTConfig,
load_wav,
calc_stft,
plot_spec,
fish_filter
)
MODEL = from_pretrained_fastai('axds/classify-fish-sounds')
LABELS = {
0: 'No call',
1: 'Black grouper call 1',
2: 'Black grouper call 2',
3: 'Black grouper grunt',
4: 'Unidentified sound',
5: 'Red grouper 1',
6: 'Red grouper 2',
7: 'Red hind 1',
8: 'Red hind 2',
9: 'Red hind 3',
10: 'Goliath grouper',
11: 'Goliath grouper multi-phase'
}
FFT_CONFIG = FFTConfig()
def classify_audio(inp, model=MODEL, labels=LABELS):
with Spectrogram(inp) as spec_buffer:
# Open spec from in-memory file as image
image_buffer = Image.open(spec_buffer)
# Cast to array, skip alpha channel
image_arr = np.array(image_buffer)[:, :, :3]
# Predict!
results = model.predict(image_arr)
# Return class labels and confidence value
confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))}
return image_buffer, confidences
class Spectrogram:
def __init__(self, inp, fft_config=FFT_CONFIG):
self.inp = inp
self.buffer = io.BytesIO()
self.fft_config = fft_config
def __enter__(self):
plot_spec(self.inp, self.buffer, self.fft_config)
return self.buffer
def __exit__(self, exc_typ, exc_value, exc_traceback):
self.buffer.close()
iface = gr.Interface(
fn=classify_audio,
inputs=gr.inputs.Audio(source="upload", type="numpy"),
outputs=[
gr.outputs.Image(),
gr.outputs.Label(num_top_classes=3),
],
examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"],
title="Classify fish sounds from audio files"
)
iface.launch()
|