themes
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/classpred.cpython-311.pyc +0 -0
- app.py +66 -8
- classpred.py +1 -1
__pycache__/app.cpython-311.pyc
CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
__pycache__/classpred.cpython-311.pyc
CHANGED
Binary files a/__pycache__/classpred.cpython-311.pyc and b/__pycache__/classpred.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -10,6 +10,11 @@ import torch
|
|
10 |
import librosa
|
11 |
import noisereduce as nr
|
12 |
import timm
|
|
|
|
|
|
|
|
|
|
|
13 |
import pandas as pd
|
14 |
from classpred import predict_class
|
15 |
import torch.nn.functional as F
|
@@ -134,13 +139,18 @@ def preprocess_for_inference(audio_arr, sr):
|
|
134 |
results = []
|
135 |
for idx, scores in zip(topk_indices, topk_values):
|
136 |
species_name = species_id_to_name[idx.item()]
|
137 |
-
probability = scores.item()
|
138 |
results.append([species_name, probability])
|
139 |
|
140 |
return results
|
141 |
|
142 |
DESCRIPTION = """
|
143 |
-
Bird audio classification using SOTA Voice of Jungle Technology.
|
|
|
|
|
|
|
|
|
|
|
144 |
"""
|
145 |
|
146 |
|
@@ -153,11 +163,59 @@ css = """
|
|
153 |
height: 100%;
|
154 |
}
|
155 |
.column-container {
|
156 |
-
height: 100%;
|
157 |
-
}
|
158 |
"""
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
gr.Markdown(DESCRIPTION)
|
162 |
|
163 |
with gr.Row():
|
@@ -169,8 +227,8 @@ with gr.Blocks(css = css) as demo:
|
|
169 |
|
170 |
|
171 |
with gr.Row():
|
172 |
-
raw_class_output = gr.Dataframe(headers=["
|
173 |
-
species_output = gr.Dataframe(headers=["
|
174 |
|
175 |
with gr.Row():
|
176 |
waveform_output = gr.Plot(label="Waveform")
|
|
|
10 |
import librosa
|
11 |
import noisereduce as nr
|
12 |
import timm
|
13 |
+
from typing import Iterable
|
14 |
+
import gradio as gr
|
15 |
+
from gradio.themes.base import Base
|
16 |
+
from gradio.themes.utils import colors, fonts, sizes
|
17 |
+
import time
|
18 |
import pandas as pd
|
19 |
from classpred import predict_class
|
20 |
import torch.nn.functional as F
|
|
|
139 |
results = []
|
140 |
for idx, scores in zip(topk_indices, topk_values):
|
141 |
species_name = species_id_to_name[idx.item()]
|
142 |
+
probability = scores.item()*100
|
143 |
results.append([species_name, probability])
|
144 |
|
145 |
return results
|
146 |
|
147 |
DESCRIPTION = """
|
148 |
+
# Bird audio classification using SOTA Voice of Jungle Technology. \n
|
149 |
+
# Introduction
|
150 |
+
|
151 |
+
It is esimated that 50% of the global economy is threatened by biodiversity loss. As such, efforts have been concerted into estimating bird biodiversity, as birds are a top indicator of biodiversity in the region. One of these efforts is
|
152 |
+
finding the bird species in a region using bird species audio classification.
|
153 |
+
Prediction on left table shows prediction on the type of noise (class), while the right predictions are the species of bird. If class prediction does not output bird, then consequently the species prediction is not confident.
|
154 |
"""
|
155 |
|
156 |
|
|
|
163 |
height: 100%;
|
164 |
}
|
165 |
.column-container {
|
166 |
+
height: 100%;
|
167 |
+
}
|
168 |
"""
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
class Seafoam(Base):
|
174 |
+
def __init__(
|
175 |
+
self,
|
176 |
+
*,
|
177 |
+
primary_hue: colors.Color | str = colors.emerald,
|
178 |
+
secondary_hue: colors.Color | str = colors.blue,
|
179 |
+
neutral_hue: colors.Color | str = colors.gray,
|
180 |
+
spacing_size: sizes.Size | str = sizes.spacing_md,
|
181 |
+
radius_size: sizes.Size | str = sizes.radius_md,
|
182 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
183 |
+
font: fonts.Font
|
184 |
+
| str
|
185 |
+
| Iterable[fonts.Font | str] = (
|
186 |
+
fonts.GoogleFont("Quicksand"),
|
187 |
+
"ui-sans-serif",
|
188 |
+
"sans-serif",
|
189 |
+
),
|
190 |
+
font_mono: fonts.Font
|
191 |
+
| str
|
192 |
+
| Iterable[fonts.Font | str] = (
|
193 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
194 |
+
"ui-monospace",
|
195 |
+
"monospace",
|
196 |
+
),
|
197 |
+
):
|
198 |
+
super().__init__(
|
199 |
+
primary_hue=primary_hue,
|
200 |
+
secondary_hue=secondary_hue,
|
201 |
+
neutral_hue=neutral_hue,
|
202 |
+
spacing_size=spacing_size,
|
203 |
+
radius_size=radius_size,
|
204 |
+
text_size=text_size,
|
205 |
+
font=font,
|
206 |
+
font_mono=font_mono,
|
207 |
+
)
|
208 |
+
|
209 |
+
|
210 |
+
seafoam = Seafoam()
|
211 |
+
|
212 |
+
## logo: <img src="https://i.ibb.co/vcG9kr0/vojlogo.jpg" alt="vojlogo" border="0">
|
213 |
+
## cactus: <img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0">
|
214 |
+
with gr.Blocks(theme=seafoam, css = css) as demo:
|
215 |
+
#img_src = '<img src="https://i.ibb.co/3sW2mJN/spur.jpg" alt="spur" border="0" width = "100%">'
|
216 |
+
#gr.Markdown(f"{img_src}")
|
217 |
+
|
218 |
+
#gr.Markdown(f"# Team Voice of Jungle {img_src} more text")
|
219 |
gr.Markdown(DESCRIPTION)
|
220 |
|
221 |
with gr.Row():
|
|
|
227 |
|
228 |
|
229 |
with gr.Row():
|
230 |
+
raw_class_output = gr.Dataframe(headers=["Class", "Score [%]"], row_count=10, label="Class Prediction")
|
231 |
+
species_output = gr.Dataframe(headers=["Class", "Score [%]"], row_count=10, label="Species Prediction")
|
232 |
|
233 |
with gr.Row():
|
234 |
waveform_output = gr.Plot(label="Waveform")
|
classpred.py
CHANGED
@@ -39,6 +39,6 @@ def predict_class(x, sr, start, end):
|
|
39 |
logits = MODEL(x.view(1, 1, 1024, 128)).squeeze(0)
|
40 |
|
41 |
topk_probs, topk_classes = logits.sigmoid().topk(10)
|
42 |
-
preds = [[AUDIOSET_LABELS[cls], prob.item()] for cls, prob in zip(topk_classes, topk_probs)]
|
43 |
|
44 |
return preds
|
|
|
39 |
logits = MODEL(x.view(1, 1, 1024, 128)).squeeze(0)
|
40 |
|
41 |
topk_probs, topk_classes = logits.sigmoid().topk(10)
|
42 |
+
preds = [[AUDIOSET_LABELS[cls], prob.item()*100] for cls, prob in zip(topk_classes, topk_probs)]
|
43 |
|
44 |
return preds
|