Spaces:

OfficerRaccoon
/

bird-sound-classifier-v2

Sleeping

App Files Files Community

OfficerRaccoon commited on Jun 27

Commit

ecef223

verified ·

1 Parent(s): eb3fddf

Upload 5 files

Browse files

Deploy bird sound classifier with 80% accuracy

Files changed (5) hide show

README.md +22 -20
app.py +296 -0
best_bird_model_extended.pth +3 -0
label_encoder.pkl +3 -0
requirements.txt +11 -3

README.md CHANGED Viewed

@@ -1,20 +1,22 @@
----
-title: Bird Sound Classifier V2
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: AI bird species identification from audio
-license: cc-by-nc-4.0
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+---
+title: Bird Sound Classifier
+emoji: 🐦
+colorFrom: green
+colorTo: blue
+sdk: streamlit
+sdk_version: 1.28.1
+app_file: app.py
+pinned: false
+license: mit
+---
+# Bird Sound Classifier 🐦
+AI-powered bird species identification from audio recordings.
+## Features
+- 80% accuracy across 110+ bird species
+- Upload .mp3/.wav files
+- Real-time predictions with confidence scores
+Built for conservation efforts.

app.py ADDED Viewed

	@@ -0,0 +1,296 @@

+import streamlit as st
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchaudio
+import torchaudio.transforms as T
+import numpy as np
+import pickle
+import tempfile
+import os
+# Your model architecture (same as before)
+class ImprovedBirdSoundCNN(nn.Module):
+    def __init__(self, num_classes, dropout_rate=0.3):
+        super(ImprovedBirdSoundCNN, self).__init__()
+        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
+        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
+        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
+        self.conv5 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
+        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.bn3 = nn.BatchNorm2d(128)
+        self.bn4 = nn.BatchNorm2d(128)
+        self.bn5 = nn.BatchNorm2d(256)
+        self.bn6 = nn.BatchNorm2d(256)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))
+        self.dropout = nn.Dropout(dropout_rate)
+        self.fc1 = nn.Linear(256 * 4 * 4, 512)
+        self.fc2 = nn.Linear(512, 256)
+        self.fc3 = nn.Linear(256, num_classes)
+    def forward(self, x):
+        x = F.relu(self.bn1(self.conv1(x)))
+        x = F.relu(self.bn2(self.conv2(x)))
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = F.relu(self.bn3(self.conv3(x)))
+        x = F.relu(self.bn4(self.conv4(x)))
+        x = self.pool(x)
+        x = self.dropout(x)
+        x = F.relu(self.bn5(self.conv5(x)))
+        x = F.relu(self.bn6(self.conv6(x)))
+        x = self.adaptive_pool(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = F.relu(self.fc2(x))
+        x = self.dropout(x)
+        x = self.fc3(x)
+        return x
+@st.cache_resource
+def load_model_and_encoder():
+    """Load model and label encoder - cached for performance"""
+    device = torch.device('cpu')  # HF Spaces uses CPU
+    try:
+        # Load label encoder
+        with open('label_encoder.pkl', 'rb') as f:
+            label_encoder = pickle.load(f)
+        num_classes = len(label_encoder.classes_)
+        # Load model
+        model = ImprovedBirdSoundCNN(num_classes=num_classes)
+        checkpoint = torch.load('best_bird_model_extended.pth', map_location=device)
+        if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+            model.load_state_dict(checkpoint['model_state_dict'])
+        else:
+            model.load_state_dict(checkpoint)
+        model.eval()
+        return model, label_encoder, device
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        return None, None, None
+def preprocess_audio(audio_file, sample_rate=22050, duration=5):
+    """Preprocess audio for prediction"""
+    try:
+        # Load audio
+        waveform, sr = torchaudio.load(audio_file)
+        # Resample if necessary
+        if sr != sample_rate:
+            resampler = T.Resample(sr, sample_rate)
+            waveform = resampler(waveform)
+        # Convert to mono
+        if waveform.shape[0] > 1:
+            waveform = torch.mean(waveform, dim=0, keepdim=True)
+        # Normalize
+        waveform = waveform / (torch.max(torch.abs(waveform)) + 1e-8)
+        # Pad or trim
+        target_length = sample_rate * duration
+        if waveform.shape[1] > target_length:
+            start = (waveform.shape[1] - target_length) // 2
+            waveform = waveform[:, start:start + target_length]
+        else:
+            padding = target_length - waveform.shape[1]
+            waveform = torch.nn.functional.pad(waveform, (0, padding))
+        # Create spectrogram
+        mel_transform = T.MelSpectrogram(
+            sample_rate=sample_rate,
+            n_fft=2048,
+            hop_length=512,
+            n_mels=128,
+            f_min=0,
+            f_max=8000,
+            window_fn=torch.hann_window,
+            power=2.0
+        )
+        amplitude_to_db = T.AmplitudeToDB(stype='power', top_db=80)
+        mel_spec = mel_transform(waveform)
+        mel_spec_db = amplitude_to_db(mel_spec)
+        mel_spec_db = (mel_spec_db - mel_spec_db.mean()) / (mel_spec_db.std() + 1e-8)
+        return mel_spec_db.unsqueeze(0)
+    except Exception as e:
+        st.error(f"Error preprocessing audio: {str(e)}")
+        return None
+def predict_bird_species(model, spectrogram, label_encoder, device):
+    """Make prediction on spectrogram"""
+    try:
+        spectrogram = spectrogram.to(device)
+        with torch.no_grad():
+            outputs = model(spectrogram)
+            probabilities = torch.softmax(outputs, dim=1)
+            confidence, predicted = torch.max(probabilities, 1)
+            predicted_species = label_encoder.inverse_transform([predicted.item()])[0]
+            confidence_score = confidence.item()
+            # Get top 3 predictions
+            top3_probs, top3_indices = torch.topk(probabilities, 3, dim=1)
+            top3_species = []
+            for i in range(3):
+                species = label_encoder.inverse_transform([top3_indices[0][i].item()])[0]
+                prob = top3_probs[0][i].item()
+                top3_species.append((species, prob))
+        return predicted_species, confidence_score, top3_species
+    except Exception as e:
+        st.error(f"Error making prediction: {str(e)}")
+        return None, None, None
+def main():
+    st.set_page_config(
+        page_title="Bird Sound Classifier",
+        page_icon="🐦",
+        layout="wide"
+    )
+    st.title("🐦 AI Bird Sound Classifier")
+    st.markdown("### Upload a bird audio recording to identify the species!")
+    st.markdown("**Trained on 110+ species with 80% accuracy**")
+    # Sidebar
+    st.sidebar.header("🌿 About This App")
+    st.sidebar.info(
+        "This AI model identifies bird species from audio recordings using "
+        "deep learning on spectrograms. Perfect for conservation efforts!"
+    )
+    st.sidebar.header("📋 Instructions")
+    st.sidebar.markdown(
+        """
+        1. Upload an audio file (.mp3, .wav)
+        2. Click 'Identify Bird Species'
+        3. View predictions and confidence scores
+        4. Check alternative species suggestions
+        """
+    )
+    # Load model
+    model, label_encoder, device = load_model_and_encoder()
+    if model is None:
+        st.error("❌ Failed to load model. Please check the model files.")
+        st.stop()
+    st.success("✅ Model loaded successfully!")
+    # File upload
+    uploaded_file = st.file_uploader(
+        "Choose an audio file",
+        type=['mp3', 'wav', 'flac'],
+        help="Upload a bird sound recording (first 5 seconds will be analyzed)"
+    )
+    if uploaded_file is not None:
+        # Display file info
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("**📁 File Details:**")
+            st.write(f"• Name: {uploaded_file.name}")
+            st.write(f"• Size: {uploaded_file.size:,} bytes")
+        with col2:
+            st.write("**🎵 Audio Player:**")
+            st.audio(uploaded_file, format='audio/wav')
+        # Prediction button
+        if st.button("🔍 Identify Bird Species", type="primary", use_container_width=True):
+            with st.spinner("🔄 Processing audio and making prediction..."):
+                # Save uploaded file temporarily
+                with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    tmp_file_path = tmp_file.name
+                # Process and predict
+                spectrogram = preprocess_audio(tmp_file_path)
+                if spectrogram is not None:
+                    predicted_species, confidence, top3_predictions = predict_bird_species(
+                        model, spectrogram, label_encoder, device
+                    )
+                    # Clean up
+                    os.unlink(tmp_file_path)
+                    if predicted_species is not None:
+                        # Display results
+                        st.success("🎉 Prediction Complete!")
+                        # Main prediction
+                        st.subheader("🏆 Primary Prediction")
+                        clean_species = predicted_species.replace("_sound", "").replace("_", " ")
+                        col1, col2 = st.columns([2, 1])
+                        with col1:
+                            st.metric(
+                                label="Predicted Species",
+                                value=clean_species,
+                                delta=f"{confidence:.1%} confidence"
+                            )
+                        with col2:
+                            if confidence > 0.8:
+                                st.success("🎯 High Confidence")
+                            elif confidence > 0.6:
+                                st.warning("⚠️ Moderate Confidence")
+                            else:
+                                st.info("💭 Low Confidence")
+                        # Top 3 predictions
+                        st.subheader("📊 Alternative Predictions")
+                        for i, (species, prob) in enumerate(top3_predictions):
+                            clean_name = species.replace("_sound", "").replace("_", " ")
+                            st.write(f"**{i+1}.** {clean_name}")
+                            st.progress(prob)
+                            st.caption(f"Confidence: {prob:.1%}")
+                        # Conservation note
+                        st.subheader("🌿 Conservation Impact")
+                        st.info(
+                            f"Identifying '{clean_species}' helps with biodiversity monitoring "
+                            "and conservation efforts in national parks and protected areas."
+                        )
+                else:
+                    st.error("❌ Failed to process audio file.")
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "**🌍 Built for Conservation** | "
+        "This tool supports wildlife monitoring and biodiversity research."
+    )
+if __name__ == "__main__":
+    main()

best_bird_model_extended.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec8fe37088efd2913125f8af12564cb74ae2f00c6de14de4811c2227d5ad77c6
+size 133

label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5873dfe367f384888203d3e05f35af1e72484dd996b06cc5184b56b4f7d5bdb
+size 14832

requirements.txt CHANGED Viewed

@@ -1,3 +1,11 @@
-altair
-pandas
-streamlit

+<<<<<<< HEAD
+streamlit==1.28.1
+torch==2.0.1
+torchaudio==2.0.2
+scikit-learn==1.3.0
+numpy==1.24.3
+=======
+altair
+pandas
+streamlit
+>>>>>>> dc2102210ae764c915fc5f4ac1fa3ad6b0cadc59