Spaces:
Sleeping
Sleeping
BilalSardar
commited on
Commit
•
e1490d6
1
Parent(s):
81932e3
Upload 3 files
Browse files- .gitattributes +1 -0
- example/soundscape.wav +3 -0
- example/species_list.txt +48 -0
- segments.py +305 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
example/soundscape.wav filter=lfs diff=lfs merge=lfs -text
|
example/soundscape.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df312b45bc82ce4c638c3e9e09d748702ea14a91ec29e4e8e0676d3e3e015fd7
|
3 |
+
size 11520046
|
example/species_list.txt
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Accipiter cooperii_Cooper's Hawk
|
2 |
+
Agelaius phoeniceus_Red-winged Blackbird
|
3 |
+
Anas platyrhynchos_Mallard
|
4 |
+
Anas rubripes_American Black Duck
|
5 |
+
Ardea herodias_Great Blue Heron
|
6 |
+
Baeolophus bicolor_Tufted Titmouse
|
7 |
+
Branta canadensis_Canada Goose
|
8 |
+
Bucephala albeola_Bufflehead
|
9 |
+
Bucephala clangula_Common Goldeneye
|
10 |
+
Buteo jamaicensis_Red-tailed Hawk
|
11 |
+
Cardinalis cardinalis_Northern Cardinal
|
12 |
+
Certhia americana_Brown Creeper
|
13 |
+
Colaptes auratus_Northern Flicker
|
14 |
+
Columba livia_Rock Pigeon
|
15 |
+
Corvus brachyrhynchos_American Crow
|
16 |
+
Corvus corax_Common Raven
|
17 |
+
Cyanocitta cristata_Blue Jay
|
18 |
+
Cygnus olor_Mute Swan
|
19 |
+
Dryobates pubescens_Downy Woodpecker
|
20 |
+
Dryobates villosus_Hairy Woodpecker
|
21 |
+
Dryocopus pileatus_Pileated Woodpecker
|
22 |
+
Eremophila alpestris_Horned Lark
|
23 |
+
Haemorhous mexicanus_House Finch
|
24 |
+
Haemorhous purpureus_Purple Finch
|
25 |
+
Haliaeetus leucocephalus_Bald Eagle
|
26 |
+
Junco hyemalis_Dark-eyed Junco
|
27 |
+
Larus argentatus_Herring Gull
|
28 |
+
Larus delawarensis_Ring-billed Gull
|
29 |
+
Lophodytes cucullatus_Hooded Merganser
|
30 |
+
Melanerpes carolinus_Red-bellied Woodpecker
|
31 |
+
Meleagris gallopavo_Wild Turkey
|
32 |
+
Melospiza melodia_Song Sparrow
|
33 |
+
Mergus merganser_Common Merganser
|
34 |
+
Mergus serrator_Red-breasted Merganser
|
35 |
+
Passer domesticus_House Sparrow
|
36 |
+
Poecile atricapillus_Black-capped Chickadee
|
37 |
+
Regulus satrapa_Golden-crowned Kinglet
|
38 |
+
Sialia sialis_Eastern Bluebird
|
39 |
+
Sitta canadensis_Red-breasted Nuthatch
|
40 |
+
Sitta carolinensis_White-breasted Nuthatch
|
41 |
+
Spinus pinus_Pine Siskin
|
42 |
+
Spinus tristis_American Goldfinch
|
43 |
+
Spizelloides arborea_American Tree Sparrow
|
44 |
+
Sturnus vulgaris_European Starling
|
45 |
+
Thryothorus ludovicianus_Carolina Wren
|
46 |
+
Turdus migratorius_American Robin
|
47 |
+
Zenaida macroura_Mourning Dove
|
48 |
+
Zonotrichia albicollis_White-throated Sparrow
|
segments.py
ADDED
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Extract segments from audio files based on BirdNET detections.
|
2 |
+
|
3 |
+
Can be used to save the segments of the audio files for each detection.
|
4 |
+
"""
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
from multiprocessing import Pool
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
import audio
|
12 |
+
import config as cfg
|
13 |
+
import utils
|
14 |
+
|
15 |
+
# Set numpy random seed
|
16 |
+
np.random.seed(cfg.RANDOM_SEED)
|
17 |
+
|
18 |
+
|
19 |
+
def detectRType(line: str):
|
20 |
+
"""Detects the type of result file.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
line: First line of text.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
Either "table", "r", "kaleidoscope", "csv" or "audacity".
|
27 |
+
"""
|
28 |
+
if line.lower().startswith("selection"):
|
29 |
+
return "table"
|
30 |
+
elif line.lower().startswith("filepath"):
|
31 |
+
return "r"
|
32 |
+
elif line.lower().startswith("indir"):
|
33 |
+
return "kaleidoscope"
|
34 |
+
elif line.lower().startswith("start (s)"):
|
35 |
+
return "csv"
|
36 |
+
else:
|
37 |
+
return "audacity"
|
38 |
+
|
39 |
+
|
40 |
+
def parseFolders(apath: str, rpath: str, allowed_result_filetypes: list[str] = ["txt", "csv"]) -> list[dict]:
|
41 |
+
"""Read audio and result files.
|
42 |
+
|
43 |
+
Reads all audio files and BirdNET output inside directory recursively.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
apath: Path to search for audio files.
|
47 |
+
rpath: Path to search for result files.
|
48 |
+
allowed_result_filetypes: List of extensions for the result files.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
A list of {"audio": path_to_audio, "result": path_to_result }.
|
52 |
+
"""
|
53 |
+
data = {}
|
54 |
+
apath = apath.replace("/", os.sep).replace("\\", os.sep)
|
55 |
+
rpath = rpath.replace("/", os.sep).replace("\\", os.sep)
|
56 |
+
|
57 |
+
# Get all audio files
|
58 |
+
for root, _, files in os.walk(apath):
|
59 |
+
for f in files:
|
60 |
+
if f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES:
|
61 |
+
data[f.rsplit(".", 1)[0]] = {"audio": os.path.join(root, f), "result": ""}
|
62 |
+
|
63 |
+
# Get all result files
|
64 |
+
for root, _, files in os.walk(rpath):
|
65 |
+
for f in files:
|
66 |
+
if f.rsplit(".", 1)[-1] in allowed_result_filetypes and ".BirdNET." in f:
|
67 |
+
data[f.split(".BirdNET.", 1)[0]]["result"] = os.path.join(root, f)
|
68 |
+
|
69 |
+
# Convert to list
|
70 |
+
flist = [f for f in data.values() if f["result"]]
|
71 |
+
|
72 |
+
print(f"Found {len(flist)} audio files with valid result file.")
|
73 |
+
|
74 |
+
return flist
|
75 |
+
|
76 |
+
|
77 |
+
def parseFiles(flist: list[dict], max_segments=100):
|
78 |
+
"""Extracts the segments for all files.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
flist: List of dict with {"audio": path_to_audio, "result": path_to_result }.
|
82 |
+
max_segments: Number of segments per species.
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
TODO @kahst
|
86 |
+
"""
|
87 |
+
species_segments: dict[str, list] = {}
|
88 |
+
|
89 |
+
for f in flist:
|
90 |
+
# Paths
|
91 |
+
afile = f["audio"]
|
92 |
+
rfile = f["result"]
|
93 |
+
|
94 |
+
# Get all segments for result file
|
95 |
+
segments = findSegments(afile, rfile)
|
96 |
+
|
97 |
+
# Parse segments by species
|
98 |
+
for s in segments:
|
99 |
+
if s["species"] not in species_segments:
|
100 |
+
species_segments[s["species"]] = []
|
101 |
+
|
102 |
+
species_segments[s["species"]].append(s)
|
103 |
+
|
104 |
+
# Shuffle segments for each species and limit to max_segments
|
105 |
+
for s in species_segments:
|
106 |
+
np.random.shuffle(species_segments[s])
|
107 |
+
species_segments[s] = species_segments[s][:max_segments]
|
108 |
+
|
109 |
+
# Make dict of segments per audio file
|
110 |
+
segments: dict[str, list] = {}
|
111 |
+
seg_cnt = 0
|
112 |
+
|
113 |
+
for s in species_segments:
|
114 |
+
for seg in species_segments[s]:
|
115 |
+
if seg["audio"] not in segments:
|
116 |
+
segments[seg["audio"]] = []
|
117 |
+
|
118 |
+
segments[seg["audio"]].append(seg)
|
119 |
+
seg_cnt += 1
|
120 |
+
|
121 |
+
print(f"Found {seg_cnt} segments in {len(segments)} audio files.")
|
122 |
+
|
123 |
+
# Convert to list
|
124 |
+
flist = [tuple(e) for e in segments.items()]
|
125 |
+
|
126 |
+
return flist
|
127 |
+
|
128 |
+
|
129 |
+
def findSegments(afile: str, rfile: str):
|
130 |
+
"""Extracts the segments for an audio file from the results file
|
131 |
+
|
132 |
+
Args:
|
133 |
+
afile: Path to the audio file.
|
134 |
+
rfile: Path to the result file.
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
A list of dicts in the form of
|
138 |
+
{"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
|
139 |
+
"""
|
140 |
+
segments: list[dict] = []
|
141 |
+
|
142 |
+
# Open and parse result file
|
143 |
+
lines = utils.readLines(rfile)
|
144 |
+
|
145 |
+
# Auto-detect result type
|
146 |
+
rtype = detectRType(lines[0])
|
147 |
+
|
148 |
+
# Get start and end times based on rtype
|
149 |
+
confidence = 0
|
150 |
+
start = end = 0.0
|
151 |
+
species = ""
|
152 |
+
|
153 |
+
for i, line in enumerate(lines):
|
154 |
+
if rtype == "table" and i > 0:
|
155 |
+
d = line.split("\t")
|
156 |
+
start = float(d[3])
|
157 |
+
end = float(d[4])
|
158 |
+
species = d[-2]
|
159 |
+
confidence = float(d[-1])
|
160 |
+
|
161 |
+
elif rtype == "audacity":
|
162 |
+
d = line.split("\t")
|
163 |
+
start = float(d[0])
|
164 |
+
end = float(d[1])
|
165 |
+
species = d[2].split(", ")[1]
|
166 |
+
confidence = float(d[-1])
|
167 |
+
|
168 |
+
elif rtype == "r" and i > 0:
|
169 |
+
d = line.split(",")
|
170 |
+
start = float(d[1])
|
171 |
+
end = float(d[2])
|
172 |
+
species = d[4]
|
173 |
+
confidence = float(d[5])
|
174 |
+
|
175 |
+
elif rtype == "kaleidoscope" and i > 0:
|
176 |
+
d = line.split(",")
|
177 |
+
start = float(d[3])
|
178 |
+
end = float(d[4]) + start
|
179 |
+
species = d[5]
|
180 |
+
confidence = float(d[7])
|
181 |
+
|
182 |
+
elif rtype == "csv" and i > 0:
|
183 |
+
d = line.split(",")
|
184 |
+
start = float(d[0])
|
185 |
+
end = float(d[1])
|
186 |
+
species = d[3]
|
187 |
+
confidence = float(d[4])
|
188 |
+
|
189 |
+
# Check if confidence is high enough
|
190 |
+
if confidence >= cfg.MIN_CONFIDENCE:
|
191 |
+
segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
|
192 |
+
|
193 |
+
return segments
|
194 |
+
|
195 |
+
|
196 |
+
def extractSegments(item: tuple[tuple[str, list[dict]], float, dict[str]]):
|
197 |
+
"""Saves each segment separately.
|
198 |
+
|
199 |
+
Creates an audio file for each species segment.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
item: A tuple that contains ((audio file path, segments), segment length, config)
|
203 |
+
"""
|
204 |
+
# Paths and config
|
205 |
+
afile = item[0][0]
|
206 |
+
segments = item[0][1]
|
207 |
+
seg_length = item[1]
|
208 |
+
cfg.setConfig(item[2])
|
209 |
+
|
210 |
+
# Status
|
211 |
+
print(f"Extracting segments from {afile}")
|
212 |
+
|
213 |
+
try:
|
214 |
+
# Open audio file
|
215 |
+
sig, _ = audio.openAudioFile(afile, cfg.SAMPLE_RATE)
|
216 |
+
except Exception as ex:
|
217 |
+
print(f"Error: Cannot open audio file {afile}", flush=True)
|
218 |
+
utils.writeErrorLog(ex)
|
219 |
+
|
220 |
+
return
|
221 |
+
|
222 |
+
# Extract segments
|
223 |
+
for seg_cnt, seg in enumerate(segments, 1):
|
224 |
+
try:
|
225 |
+
# Get start and end times
|
226 |
+
start = int(seg["start"] * cfg.SAMPLE_RATE)
|
227 |
+
end = int(seg["end"] * cfg.SAMPLE_RATE)
|
228 |
+
offset = ((seg_length * cfg.SAMPLE_RATE) - (end - start)) // 2
|
229 |
+
start = max(0, start - offset)
|
230 |
+
end = min(len(sig), end + offset)
|
231 |
+
|
232 |
+
# Make sure segment is long enough
|
233 |
+
if end > start:
|
234 |
+
# Get segment raw audio from signal
|
235 |
+
seg_sig = sig[int(start) : int(end)]
|
236 |
+
|
237 |
+
# Make output path
|
238 |
+
outpath = os.path.join(cfg.OUTPUT_PATH, seg["species"])
|
239 |
+
os.makedirs(outpath, exist_ok=True)
|
240 |
+
|
241 |
+
# Save segment
|
242 |
+
seg_name = "{:.3f}_{}_{}.wav".format(
|
243 |
+
seg["confidence"], seg_cnt, seg["audio"].rsplit(os.sep, 1)[-1].rsplit(".", 1)[0]
|
244 |
+
)
|
245 |
+
seg_path = os.path.join(outpath, seg_name)
|
246 |
+
audio.saveSignal(seg_sig, seg_path)
|
247 |
+
|
248 |
+
except Exception as ex:
|
249 |
+
# Write error log
|
250 |
+
print(f"Error: Cannot extract segments from {afile}.", flush=True)
|
251 |
+
utils.writeErrorLog(ex)
|
252 |
+
return False
|
253 |
+
|
254 |
+
return True
|
255 |
+
|
256 |
+
|
257 |
+
if __name__ == "__main__":
|
258 |
+
# Parse arguments
|
259 |
+
parser = argparse.ArgumentParser(description="Extract segments from audio files based on BirdNET detections.")
|
260 |
+
parser.add_argument("--audio", default="example/", help="Path to folder containing audio files.")
|
261 |
+
parser.add_argument("--results", default="example/", help="Path to folder containing result files.")
|
262 |
+
parser.add_argument("--o", default="example/", help="Output folder path for extracted segments.")
|
263 |
+
parser.add_argument(
|
264 |
+
"--min_conf", type=float, default=0.1, help="Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1."
|
265 |
+
)
|
266 |
+
parser.add_argument("--max_segments", type=int, default=100, help="Number of randomly extracted segments per species.")
|
267 |
+
parser.add_argument(
|
268 |
+
"--seg_length", type=float, default=3.0, help="Length of extracted segments in seconds. Defaults to 3.0."
|
269 |
+
)
|
270 |
+
parser.add_argument("--threads", type=int, default=4, help="Number of CPU threads.")
|
271 |
+
|
272 |
+
args = parser.parse_args()
|
273 |
+
|
274 |
+
# Parse audio and result folders
|
275 |
+
cfg.FILE_LIST = parseFolders(args.audio, args.results)
|
276 |
+
|
277 |
+
# Set output folder
|
278 |
+
cfg.OUTPUT_PATH = args.o
|
279 |
+
|
280 |
+
# Set number of threads
|
281 |
+
cfg.CPU_THREADS = int(args.threads)
|
282 |
+
|
283 |
+
# Set confidence threshold
|
284 |
+
cfg.MIN_CONFIDENCE = max(0.01, min(0.99, float(args.min_conf)))
|
285 |
+
|
286 |
+
# Parse file list and make list of segments
|
287 |
+
cfg.FILE_LIST = parseFiles(cfg.FILE_LIST, max(1, int(args.max_segments)))
|
288 |
+
|
289 |
+
# Add config items to each file list entry.
|
290 |
+
# We have to do this for Windows which does not
|
291 |
+
# support fork() and thus each process has to
|
292 |
+
# have its own config. USE LINUX!
|
293 |
+
flist = [(entry, max(cfg.SIG_LENGTH, float(args.seg_length)), cfg.getConfig()) for entry in cfg.FILE_LIST]
|
294 |
+
|
295 |
+
# Extract segments
|
296 |
+
if cfg.CPU_THREADS < 2:
|
297 |
+
for entry in flist:
|
298 |
+
extractSegments(entry)
|
299 |
+
else:
|
300 |
+
with Pool(cfg.CPU_THREADS) as p:
|
301 |
+
p.map(extractSegments, flist)
|
302 |
+
|
303 |
+
# A few examples to test
|
304 |
+
# python3 segments.py --audio example/ --results example/ --o example/segments/
|
305 |
+
# python3 segments.py --audio example/ --results example/ --o example/segments/ --seg_length 5.0 --min_conf 0.1 --max_segments 100 --threads 4
|