voj

Sleeping

App Files Files Community

amroa commited on Jun 2, 2024

Commit

36dbf7a

1 Parent(s): 8c4ff63

themes

Browse files

Files changed (1) hide show

app.py +60 -12

app.py CHANGED Viewed

@@ -145,16 +145,33 @@ def preprocess_for_inference(audio_arr, sr):
     return results
 DESCRIPTION = """
-# Bird audio classification using SOTA Voice of Jungle Technology.  \n
 # Introduction
-It is esimated that 50% of the global economy is threatened by biodiversity loss. As such, efforts have been concerted into estimating bird biodiversity, as birds are a top indicator of biodiversity in the region. One of these efforts is
 finding the bird species in a region using bird species audio classification.
-Prediction on left table shows prediction on the type of noise (class), while the right predictions are the species of bird. If class prediction does not output bird, then consequently the species prediction is not confident.
 """
 css = """
 .number-input {
     height: 100%;
     padding-bottom: 60px; /* Adust the value as needed for more or less space */
@@ -209,15 +226,42 @@ class Seafoam(Base):
 seafoam = Seafoam()
-## logo: <img src="https://i.ibb.co/vcG9kr0/vojlogo.jpg" alt="vojlogo" border="0">
-## cactus: <img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0">
-with gr.Blocks(theme=seafoam, css = css) as demo:
-    #img_src = '<img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0" width = "100%">'
-    #gr.Markdown(f"{img_src}")
-    #gr.Markdown(f"# Team Voice of Jungle {img_src}  more text")
     gr.Markdown(DESCRIPTION)
     with gr.Row():
         with gr.Column(elem_classes="column-container"):
             start_time_input = gr.Number(label="Start Time", value=0, elem_classes="number-input full-height")
@@ -236,7 +280,6 @@ with gr.Blocks(theme=seafoam, css = css) as demo:
     gr.Examples(
         examples=[
-            ["312_Cissopis_leverinia_1.wav", 0, 5],
             ["1094_Pionus_fuscus_2.wav", 0, 10],
         ],
         inputs=[audio_input, start_time_input, end_time_input]
@@ -244,4 +287,9 @@ with gr.Blocks(theme=seafoam, css = css) as demo:
     gr.Button("Predict").click(predict, [audio_input, start_time_input, end_time_input], [raw_class_output, species_output, waveform_output, spectrogram_output])
-demo.launch(share = True)

     return results
 DESCRIPTION = """
 # Introduction
+It is esimated that 50% of the global economy is threatened by biodiversity loss [2]. As such, intensive efforts have been concerted into estimating bird biodiversity, as birds are a top indicator of biodiversity in the region. One of these efforts is
 finding the bird species in a region using bird species audio classification.
+# Solution
+To tackle this problem, we propose VOJ. It first preprocesses an audio signal using a bandpass filter (1K - 8K) and then applies downsampling to 16K Hz. Afterwards, we input the signal into AudioMAE (Audio Masked AutoEncoder by Meta [1]) which extracts relevant features even in the presence of corruptions to the signal spectrogram.
+The AudioMAE is also trained on 527 types of audio that comprise bird, silence, environmental noise, and other types. The purpose of this initial inference stage is to provide an initial sense of the audio. If the AudioMAE outputs silence, we can expect low species prediction confidence, or if the output is insect, it may not be worth labelling.
+Next, we train BirdAST, which has Audio Spectrogram Transformer (AST) as backbone, followed by an attention pooling and dense layer. We also train EfficientB0 on the melspectrogram, and finally, we train a model using Wav2Vec pretrained on 50 bird species [3].
 """
 css = """
+#gradio-animation {
+    font-size: 2em;
+    font-weight: bold;
+    text-align: center;
+    margin-bottom: 20px;
+}
+.logo-container img {
+    width: 14%;  /* Adjust width as necessary */
+    display: block;
+    margin: auto;
+}
 .number-input {
     height: 100%;
     padding-bottom: 60px; /* Adust the value as needed for more or less space */
 seafoam = Seafoam()
+js = """
+function createGradioAnimation() {
+    var container = document.getElementById('gradio-animation');
+    var text = 'Voice of Jungle';
+    for (var i = 0; i < text.length; i++) {
+        (function(i){
+            setTimeout(function(){
+                var letter = document.createElement('span');
+                letter.style.opacity = '0';
+                letter.style.transition = 'opacity 0.5s';
+                letter.innerText = text[i];
+                container.appendChild(letter);
+                setTimeout(function() {
+                    letter.style.opacity = '1';
+                }, 50);
+            }, i * 250);
+        })(i);
+    }
+}
+"""
+REFERENCES = """
+References
+[1] Huang, P.-Y., Xu, H., Li, J., Baevski, A., Auli, M., Galuba, W., Metze, F., & Feichtenhofer, C. (2022). Masked Autoencoders that Listen. In NeurIPS.
+[2] Torkington, S. (2023, February 7). 50% of the global economy is under threat from biodiversity loss. World Economic Forum. Retrieved from https://www.weforum.org/agenda/2023/02/biodiversity-nature-loss-cop15/.
+[3] https://www.kaggle.com/code/dima806/bird-species-by-sound-detection
+"""
+with gr.Blocks(theme = seafoam, css = css, js = js) as demo:
+    gr.Markdown('<div class="logo-container"><img src="https://i.ibb.co/vcG9kr0/vojlogo.jpg" width="50px" alt="vojlogo"></div>')
+    gr.Markdown('<div id="gradio-animation"></div>')
     gr.Markdown(DESCRIPTION)
     with gr.Row():
         with gr.Column(elem_classes="column-container"):
             start_time_input = gr.Number(label="Start Time", value=0, elem_classes="number-input full-height")
     gr.Examples(
         examples=[
             ["1094_Pionus_fuscus_2.wav", 0, 10],
         ],
         inputs=[audio_input, start_time_input, end_time_input]
     gr.Button("Predict").click(predict, [audio_input, start_time_input, end_time_input], [raw_class_output, species_output, waveform_output, spectrogram_output])
+    gr.Markdown(REFERENCES)
+demo.launch(share = True)
+## logo: <img src="https://i.ibb.co/vcG9kr0/vojlogo.jpg" alt="vojlogo" border="0">
+## cactus: <img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0">