Spaces:

OfficerRaccoon
/

bird-sound-classifier-v2

Sleeping

App Files Files Community

OfficerRaccoon commited on Jun 27

Commit

a1f7c70

verified ·

1 Parent(s): a274886

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -85

app.py CHANGED Viewed

@@ -166,8 +166,11 @@ def load_model_and_encoder():
 # ------------------------------------------------------------------------------------------------------------------------------
 def preprocess_audio(uploaded_file, sample_rate=22050, duration=5):
-    """Process audio from Streamlit uploaded file without seek() method"""
     tmp_file_path = None
     try:
         # Get the raw bytes from Streamlit uploaded file
         audio_bytes = uploaded_file.getvalue()
@@ -176,7 +179,10 @@ def preprocess_audio(uploaded_file, sample_rate=22050, duration=5):
         # Create a unique temporary file path
         import hashlib
         file_hash = hashlib.md5(audio_bytes).hexdigest()[:8]
-        tmp_file_path = f"/tmp/audio_{file_hash}.wav"
         # Write bytes to temporary file
         with open(tmp_file_path, 'wb') as f:
@@ -194,31 +200,28 @@ def preprocess_audio(uploaded_file, sample_rate=22050, duration=5):
         st.write(f"✅ Created temp file: {file_size} bytes at {tmp_file_path}")
-        # Load audio with torchaudio
         try:
-            waveform, sr = torchaudio.load(tmp_file_path)
-            st.write(f"✅ Audio loaded: shape {waveform.shape}, sample rate {sr}")
         except Exception as load_error:
-            st.error(f"❌ torchaudio.load failed: {load_error}")
             return None
-        # Resample if necessary
-        if sr != sample_rate:
-            resampler = T.Resample(sr, sample_rate)
-            waveform = resampler(waveform)
-            st.write(f"✅ Resampled to {sample_rate} Hz")
-        # Convert to mono
-        if waveform.shape[0] > 1:
-            waveform = torch.mean(waveform, dim=0, keepdim=True)
-            st.write("✅ Converted to mono")
         # Normalize audio
         max_val = torch.max(torch.abs(waveform))
         if max_val > 0:
             waveform = waveform / max_val
-        # Pad or trim to fixed duration
         target_length = sample_rate * duration
         current_length = waveform.shape[1]
@@ -231,7 +234,7 @@ def preprocess_audio(uploaded_file, sample_rate=22050, duration=5):
             waveform = torch.nn.functional.pad(waveform, (0, padding))
             st.write(f"✅ Padded audio to {target_length} samples")
-        # Create mel spectrogram
         mel_transform = T.MelSpectrogram(
             sample_rate=sample_rate,
             n_fft=2048,
@@ -360,93 +363,31 @@ def main():
             st.write("**🎵 Audio Player:**")
             st.audio(uploaded_file, format='audio/wav')
         # Prediction button
         if st.button("🔍 Identify Bird Species", type="primary", use_container_width=True):
             with st.spinner("🔄 Processing audio and making prediction..."):
                 try:
-                    # Create temporary file with proper handling
-                    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
-                        # Write the uploaded file data
-                        tmp_file.write(uploaded_file.getvalue())
-                        tmp_file.flush()  # Ensure data is written
-                        tmp_file_path = tmp_file.name
-                    # Verify file was created successfully
-                    if not os.path.exists(tmp_file_path):
-                        st.error("❌ Failed to create temporary file")
-                        return
-                    file_size = os.path.getsize(tmp_file_path)
-                    if file_size == 0:
-                        st.error("❌ Temporary file is empty")
-                        return
-                    st.write(f"✅ Temporary file created: {file_size} bytes")
-                    # Process audio
-                    spectrogram = preprocess_audio(uploaded_file)
                     if spectrogram is not None:
                         predicted_species, confidence, top3_predictions = predict_bird_species(
                             model, spectrogram, label_encoder, device
                         )
-                        # Clean up temp file
-                        try:
-                            os.unlink(tmp_file_path)
-                        except:
-                            pass  # Ignore cleanup errors
                         # Display results
                         if predicted_species is not None:
                             st.success("🎉 Prediction Complete!")
-                            # Main prediction
-                            st.subheader("🏆 Primary Prediction")
-                            clean_species = predicted_species.replace("_sound", "").replace("_", " ")
-                            col1, col2 = st.columns([2, 1])
-                            with col1:
-                                st.metric(
-                                    label="Predicted Species",
-                                    value=clean_species,
-                                    delta=f"{confidence:.1%} confidence"
-                                )
-                            with col2:
-                                if confidence > 0.8:
-                                    st.success("🎯 High Confidence")
-                                elif confidence > 0.6:
-                                    st.warning("⚠️ Moderate Confidence")
-                                else:
-                                    st.info("💭 Low Confidence")
-                            # Top 3 predictions
-                            st.subheader("📊 Alternative Predictions")
-                            for i, (species, prob) in enumerate(top3_predictions):
-                                clean_name = species.replace("_sound", "").replace("_", " ")
-                                st.write(f"**{i+1}.** {clean_name}")
-                                st.progress(prob)
-                                st.caption(f"Confidence: {prob:.1%}")
-                            # Conservation note
-                            st.subheader("🌿 Conservation Impact")
-                            st.info(
-                                f"Identifying '{clean_species}' helps with biodiversity monitoring "
-                                "and conservation efforts in national parks and protected areas."
-                            )
                     else:
                         st.error("❌ Failed to process audio file.")
                 except Exception as e:
                     st.error(f"❌ Error processing audio: {str(e)}")
-                    # Clean up on error
-                    try:
-                        if 'tmp_file_path' in locals():
-                            os.unlink(tmp_file_path)
-                    except:
-                        pass
     # Footer
     st.markdown("---")

 # ------------------------------------------------------------------------------------------------------------------------------
 def preprocess_audio(uploaded_file, sample_rate=22050, duration=5):
+    """Process audio using librosa instead of torchaudio for better compatibility"""
+    import librosa
+    import numpy as np
     tmp_file_path = None
     try:
         # Get the raw bytes from Streamlit uploaded file
         audio_bytes = uploaded_file.getvalue()
         # Create a unique temporary file path
         import hashlib
         file_hash = hashlib.md5(audio_bytes).hexdigest()[:8]
+        # Determine file extension from uploaded file name
+        file_ext = uploaded_file.name.split('.')[-1].lower()
+        tmp_file_path = f"/tmp/audio_{file_hash}.{file_ext}"
         # Write bytes to temporary file
         with open(tmp_file_path, 'wb') as f:
         st.write(f"✅ Created temp file: {file_size} bytes at {tmp_file_path}")
+        # Load audio with librosa (more reliable than torchaudio)
         try:
+            # librosa can handle MP3, WAV, FLAC automatically
+            waveform, sr = librosa.load(tmp_file_path, sr=sample_rate, duration=duration)
+            st.write(f"✅ Audio loaded with librosa: shape {waveform.shape}, sample rate {sr}")
         except Exception as load_error:
+            st.error(f"❌ librosa.load failed: {load_error}")
             return None
+        # Convert numpy array to torch tensor
+        waveform = torch.from_numpy(waveform).float()
+        # Add channel dimension (librosa loads as 1D, we need 2D)
+        if len(waveform.shape) == 1:
+            waveform = waveform.unsqueeze(0)  # Shape: (1, time)
         # Normalize audio
         max_val = torch.max(torch.abs(waveform))
         if max_val > 0:
             waveform = waveform / max_val
+        # Ensure exact duration
         target_length = sample_rate * duration
         current_length = waveform.shape[1]
             waveform = torch.nn.functional.pad(waveform, (0, padding))
             st.write(f"✅ Padded audio to {target_length} samples")
+        # Create mel spectrogram using torchaudio transforms
         mel_transform = T.MelSpectrogram(
             sample_rate=sample_rate,
             n_fft=2048,
             st.write("**🎵 Audio Player:**")
             st.audio(uploaded_file, format='audio/wav')
+        # Prediction button
         # Prediction button
         if st.button("🔍 Identify Bird Species", type="primary", use_container_width=True):
             with st.spinner("🔄 Processing audio and making prediction..."):
                 try:
+                    # Process audio using librosa (more reliable)
+                    spectrogram = preprocess_audio_librosa(uploaded_file)
                     if spectrogram is not None:
                         predicted_species, confidence, top3_predictions = predict_bird_species(
                             model, spectrogram, label_encoder, device
                         )
                         # Display results
                         if predicted_species is not None:
                             st.success("🎉 Prediction Complete!")
+                            # Your existing result display code...
                     else:
                         st.error("❌ Failed to process audio file.")
                 except Exception as e:
                     st.error(f"❌ Error processing audio: {str(e)}")
     # Footer
     st.markdown("---")