"""App to demonstrate fish sound classifier. Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable. """ import io import fastai.vision.all as fai_vision import gradio as gr import numpy as np from huggingface_hub import from_pretrained_fastai from PIL import Image from create_spectrograms import ( FFTConfig, load_wav, calc_stft, plot_spec, fish_filter ) MODEL = from_pretrained_fastai('axds/classify-fish-sounds') LABELS = { 0: 'No call', 1: 'Black grouper call 1', 2: 'Black grouper call 2', 3: 'Black grouper grunt', 4: 'Unidentified sound', 5: 'Red grouper 1', 6: 'Red grouper 2', 7: 'Red hind 1', 8: 'Red hind 2', 9: 'Red hind 3', 10: 'Goliath grouper', 11: 'Goliath grouper multi-phase' } FFT_CONFIG = FFTConfig() def classify_audio(inp, model=MODEL, labels=LABELS): with Spectrogram(inp) as spec_buffer: # Open spec from in-memory file as image image_buffer = Image.open(spec_buffer) # Cast to array, skip alpha channel image_arr = np.array(image_buffer)[:, :, :3] # Predict! results = model.predict(image_arr) # Return class labels and confidence value confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))} return image_buffer, confidences class Spectrogram: def __init__(self, inp, fft_config=FFT_CONFIG): self.inp = inp self.buffer = io.BytesIO() self.fft_config = fft_config def __enter__(self): plot_spec(self.inp, self.buffer, self.fft_config) return self.buffer def __exit__(self, exc_typ, exc_value, exc_traceback): self.buffer.close() iface = gr.Interface( fn=classify_audio, inputs=gr.inputs.Audio(source="upload", type="numpy"), outputs=[ gr.outputs.Image(), gr.outputs.Label(num_top_classes=3), ], examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"], title="Classify fish sounds from audio files" ) iface.launch()