Spaces:

AIOmarRehan
/

Deep_Audio_Classifier_using_CNN

Sleeping

App Files Files Community

AIOmarRehan commited on 24 days ago

Commit

8c00eb3

verified ·

1 Parent(s): 904154d

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -19

app.py CHANGED Viewed

@@ -1,28 +1,41 @@
 import gradio as gr
 import numpy as np
-import librosa
 from PIL import Image
-import tempfile
-import os
 from app.preprocess import preprocess_audio
 from app.model import predict
 from collections import Counter, defaultdict
-# Process Image Input
 def process_image_input(img):
-    # Classify a spectrogram image directly using model.predict
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
-# Process Audio Input
 def process_audio_input(audio_path):
-    # audio_path = filepath from Gradio
-    # Preprocess → mel-spectrogram → predict per chunk
-    # Preprocess to mel-spectrogram chunk images
-    imgs = preprocess_audio(audio_path)
     all_preds = []
     all_confs = []
@@ -34,7 +47,7 @@ def process_audio_input(audio_path):
         all_confs.append(conf)
         all_probs.append(probs)
-    # Majority Vote
     counter = Counter(all_preds)
     max_count = max(counter.values())
     candidates = [k for k, v in counter.items() if v == max_count]
@@ -48,15 +61,17 @@ def process_audio_input(audio_path):
                 conf_sums[label] += all_confs[i]
         final_label = max(conf_sums, key=conf_sums.get)
-    final_conf = float(np.mean([all_confs[i] for i, l in enumerate(all_preds) if l == final_label]))
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
-# Main prediction logic
 def classify(audio_path, image):
-    # If an image is provided → classify directly
     if image is not None:
         label, conf, probs = process_image_input(image)
         return {
@@ -65,10 +80,9 @@ def classify(audio_path, image):
             "Details": probs
         }
-    # If an audio file is provided → preprocess and classify
     if audio_path is not None:
         label, conf, all_preds, all_confs = process_audio_input(audio_path)
         return {
             "Final Label": label,
             "Confidence": conf,
@@ -76,7 +90,6 @@ def classify(audio_path, image):
             "All Chunk Confidences": all_confs
         }
-    # Neither provided
     return "Please upload an audio file OR a spectrogram image."
@@ -85,7 +98,7 @@ interface = gr.Interface(
     fn=classify,
     inputs=[
         gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
-        gr.Image(type="pil", label="Upload Spectrogram Image")
     ],
     outputs=gr.JSON(label="Prediction Results"),
     title="General Audio Classifier (Audio + Spectrogram Support)",
@@ -93,7 +106,6 @@ interface = gr.Interface(
         "Upload a raw audio file OR a spectrogram image.\n"
         "If audio → model preprocesses into mel-spectrogram chunks.\n"
         "If image → model classifies the spectrogram directly.\n"
-        "Built using CNN + Mel-Spectrogram + Gradio."
     ),
 )

 import gradio as gr
 import numpy as np
 from PIL import Image
 from app.preprocess import preprocess_audio
 from app.model import predict
 from collections import Counter, defaultdict
+import librosa
+# IMAGE HANDLING
+def safe_load_image(img):
+    """
+    Ensure the input is a valid PIL RGBA image.
+    Gradio sometimes gives numpy arrays → we convert safely.
+    """
+    if img is None:
+        return None
+    # If numpy array → convert to PIL
+    if isinstance(img, np.ndarray):
+        img = Image.fromarray(img)
+    # Convert to RGBA, to make sure the Alpha channel keep
+    img = img.convert("RGBA")
+    return img
+# PROCESS SPECTROGRAM IMAGE
 def process_image_input(img):
+    img = safe_load_image(img)
     label, confidence, probs = predict(img)
     return label, round(confidence, 3), probs
+# PROCESS RAW AUDIO
 def process_audio_input(audio_path):
+    imgs = preprocess_audio(audio_path)  # returns list of PIL RGBA images
     all_preds = []
     all_confs = []
         all_confs.append(conf)
         all_probs.append(probs)
+    # Majority vote
     counter = Counter(all_preds)
     max_count = max(counter.values())
     candidates = [k for k, v in counter.items() if v == max_count]
                 conf_sums[label] += all_confs[i]
         final_label = max(conf_sums, key=conf_sums.get)
+    final_conf = float(
+        np.mean([all_confs[i] for i, lbl in enumerate(all_preds) if lbl == final_label])
+    )
     return final_label, round(final_conf, 3), all_preds, [round(c, 3) for c in all_confs]
+# MAIN CLASSIFIER
 def classify(audio_path, image):
+    # If spectrogram image
     if image is not None:
         label, conf, probs = process_image_input(image)
         return {
             "Details": probs
         }
+    # If raw audio
     if audio_path is not None:
         label, conf, all_preds, all_confs = process_audio_input(audio_path)
         return {
             "Final Label": label,
             "Confidence": conf,
             "All Chunk Confidences": all_confs
         }
     return "Please upload an audio file OR a spectrogram image."
     fn=classify,
     inputs=[
         gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
+        gr.Image(type="pil", label="Upload Spectrogram Image (PNG RGBA Supported)")
     ],
     outputs=gr.JSON(label="Prediction Results"),
     title="General Audio Classifier (Audio + Spectrogram Support)",
         "Upload a raw audio file OR a spectrogram image.\n"
         "If audio → model preprocesses into mel-spectrogram chunks.\n"
         "If image → model classifies the spectrogram directly.\n"
     ),
 )