amroa commited on
Commit
8c4ff63
·
1 Parent(s): c751d20
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
__pycache__/classpred.cpython-311.pyc CHANGED
Binary files a/__pycache__/classpred.cpython-311.pyc and b/__pycache__/classpred.cpython-311.pyc differ
 
app.py CHANGED
@@ -10,6 +10,11 @@ import torch
10
  import librosa
11
  import noisereduce as nr
12
  import timm
 
 
 
 
 
13
  import pandas as pd
14
  from classpred import predict_class
15
  import torch.nn.functional as F
@@ -134,13 +139,18 @@ def preprocess_for_inference(audio_arr, sr):
134
  results = []
135
  for idx, scores in zip(topk_indices, topk_values):
136
  species_name = species_id_to_name[idx.item()]
137
- probability = scores.item()
138
  results.append([species_name, probability])
139
 
140
  return results
141
 
142
  DESCRIPTION = """
143
- Bird audio classification using SOTA Voice of Jungle Technology.
 
 
 
 
 
144
  """
145
 
146
 
@@ -153,11 +163,59 @@ css = """
153
  height: 100%;
154
  }
155
  .column-container {
156
- height: 100%;
157
- }
158
  """
159
- with gr.Blocks(css = css) as demo:
160
- gr.Markdown("# Bird Species Audio Classification")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  gr.Markdown(DESCRIPTION)
162
 
163
  with gr.Row():
@@ -169,8 +227,8 @@ with gr.Blocks(css = css) as demo:
169
 
170
 
171
  with gr.Row():
172
- raw_class_output = gr.Dataframe(headers=["class", "score"], row_count=10, label="Class Prediction")
173
- species_output = gr.Dataframe(headers=["class", "score"], row_count=10, label="Species Prediction")
174
 
175
  with gr.Row():
176
  waveform_output = gr.Plot(label="Waveform")
 
10
  import librosa
11
  import noisereduce as nr
12
  import timm
13
+ from typing import Iterable
14
+ import gradio as gr
15
+ from gradio.themes.base import Base
16
+ from gradio.themes.utils import colors, fonts, sizes
17
+ import time
18
  import pandas as pd
19
  from classpred import predict_class
20
  import torch.nn.functional as F
 
139
  results = []
140
  for idx, scores in zip(topk_indices, topk_values):
141
  species_name = species_id_to_name[idx.item()]
142
+ probability = scores.item()*100
143
  results.append([species_name, probability])
144
 
145
  return results
146
 
147
  DESCRIPTION = """
148
+ # Bird audio classification using SOTA Voice of Jungle Technology. \n
149
+ # Introduction
150
+
151
+ It is esimated that 50% of the global economy is threatened by biodiversity loss. As such, efforts have been concerted into estimating bird biodiversity, as birds are a top indicator of biodiversity in the region. One of these efforts is
152
+ finding the bird species in a region using bird species audio classification.
153
+ Prediction on left table shows prediction on the type of noise (class), while the right predictions are the species of bird. If class prediction does not output bird, then consequently the species prediction is not confident.
154
  """
155
 
156
 
 
163
  height: 100%;
164
  }
165
  .column-container {
166
+ height: 100%;
167
+ }
168
  """
169
+
170
+
171
+
172
+
173
+ class Seafoam(Base):
174
+ def __init__(
175
+ self,
176
+ *,
177
+ primary_hue: colors.Color | str = colors.emerald,
178
+ secondary_hue: colors.Color | str = colors.blue,
179
+ neutral_hue: colors.Color | str = colors.gray,
180
+ spacing_size: sizes.Size | str = sizes.spacing_md,
181
+ radius_size: sizes.Size | str = sizes.radius_md,
182
+ text_size: sizes.Size | str = sizes.text_lg,
183
+ font: fonts.Font
184
+ | str
185
+ | Iterable[fonts.Font | str] = (
186
+ fonts.GoogleFont("Quicksand"),
187
+ "ui-sans-serif",
188
+ "sans-serif",
189
+ ),
190
+ font_mono: fonts.Font
191
+ | str
192
+ | Iterable[fonts.Font | str] = (
193
+ fonts.GoogleFont("IBM Plex Mono"),
194
+ "ui-monospace",
195
+ "monospace",
196
+ ),
197
+ ):
198
+ super().__init__(
199
+ primary_hue=primary_hue,
200
+ secondary_hue=secondary_hue,
201
+ neutral_hue=neutral_hue,
202
+ spacing_size=spacing_size,
203
+ radius_size=radius_size,
204
+ text_size=text_size,
205
+ font=font,
206
+ font_mono=font_mono,
207
+ )
208
+
209
+
210
+ seafoam = Seafoam()
211
+
212
+ ## logo: <img src="https://i.ibb.co/vcG9kr0/vojlogo.jpg" alt="vojlogo" border="0">
213
+ ## cactus: <img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0">
214
+ with gr.Blocks(theme=seafoam, css = css) as demo:
215
+ #img_src = '<img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0" width = "100%">'
216
+ #gr.Markdown(f"{img_src}")
217
+
218
+ #gr.Markdown(f"# Team Voice of Jungle {img_src} more text")
219
  gr.Markdown(DESCRIPTION)
220
 
221
  with gr.Row():
 
227
 
228
 
229
  with gr.Row():
230
+ raw_class_output = gr.Dataframe(headers=["Class", "Score [%]"], row_count=10, label="Class Prediction")
231
+ species_output = gr.Dataframe(headers=["Class", "Score [%]"], row_count=10, label="Species Prediction")
232
 
233
  with gr.Row():
234
  waveform_output = gr.Plot(label="Waveform")
classpred.py CHANGED
@@ -39,6 +39,6 @@ def predict_class(x, sr, start, end):
39
  logits = MODEL(x.view(1, 1, 1024, 128)).squeeze(0)
40
 
41
  topk_probs, topk_classes = logits.sigmoid().topk(10)
42
- preds = [[AUDIOSET_LABELS[cls], prob.item()] for cls, prob in zip(topk_classes, topk_probs)]
43
 
44
  return preds
 
39
  logits = MODEL(x.view(1, 1, 1024, 128)).squeeze(0)
40
 
41
  topk_probs, topk_classes = logits.sigmoid().topk(10)
42
+ preds = [[AUDIOSET_LABELS[cls], prob.item()*100] for cls, prob in zip(topk_classes, topk_probs)]
43
 
44
  return preds