BilalSardar commited on
Commit
e1490d6
1 Parent(s): 81932e3

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
37
+ example/soundscape.wav filter=lfs diff=lfs merge=lfs -text
example/soundscape.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df312b45bc82ce4c638c3e9e09d748702ea14a91ec29e4e8e0676d3e3e015fd7
3
+ size 11520046
example/species_list.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Accipiter cooperii_Cooper's Hawk
2
+ Agelaius phoeniceus_Red-winged Blackbird
3
+ Anas platyrhynchos_Mallard
4
+ Anas rubripes_American Black Duck
5
+ Ardea herodias_Great Blue Heron
6
+ Baeolophus bicolor_Tufted Titmouse
7
+ Branta canadensis_Canada Goose
8
+ Bucephala albeola_Bufflehead
9
+ Bucephala clangula_Common Goldeneye
10
+ Buteo jamaicensis_Red-tailed Hawk
11
+ Cardinalis cardinalis_Northern Cardinal
12
+ Certhia americana_Brown Creeper
13
+ Colaptes auratus_Northern Flicker
14
+ Columba livia_Rock Pigeon
15
+ Corvus brachyrhynchos_American Crow
16
+ Corvus corax_Common Raven
17
+ Cyanocitta cristata_Blue Jay
18
+ Cygnus olor_Mute Swan
19
+ Dryobates pubescens_Downy Woodpecker
20
+ Dryobates villosus_Hairy Woodpecker
21
+ Dryocopus pileatus_Pileated Woodpecker
22
+ Eremophila alpestris_Horned Lark
23
+ Haemorhous mexicanus_House Finch
24
+ Haemorhous purpureus_Purple Finch
25
+ Haliaeetus leucocephalus_Bald Eagle
26
+ Junco hyemalis_Dark-eyed Junco
27
+ Larus argentatus_Herring Gull
28
+ Larus delawarensis_Ring-billed Gull
29
+ Lophodytes cucullatus_Hooded Merganser
30
+ Melanerpes carolinus_Red-bellied Woodpecker
31
+ Meleagris gallopavo_Wild Turkey
32
+ Melospiza melodia_Song Sparrow
33
+ Mergus merganser_Common Merganser
34
+ Mergus serrator_Red-breasted Merganser
35
+ Passer domesticus_House Sparrow
36
+ Poecile atricapillus_Black-capped Chickadee
37
+ Regulus satrapa_Golden-crowned Kinglet
38
+ Sialia sialis_Eastern Bluebird
39
+ Sitta canadensis_Red-breasted Nuthatch
40
+ Sitta carolinensis_White-breasted Nuthatch
41
+ Spinus pinus_Pine Siskin
42
+ Spinus tristis_American Goldfinch
43
+ Spizelloides arborea_American Tree Sparrow
44
+ Sturnus vulgaris_European Starling
45
+ Thryothorus ludovicianus_Carolina Wren
46
+ Turdus migratorius_American Robin
47
+ Zenaida macroura_Mourning Dove
48
+ Zonotrichia albicollis_White-throated Sparrow
segments.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Extract segments from audio files based on BirdNET detections.
2
+
3
+ Can be used to save the segments of the audio files for each detection.
4
+ """
5
+ import argparse
6
+ import os
7
+ from multiprocessing import Pool
8
+
9
+ import numpy as np
10
+
11
+ import audio
12
+ import config as cfg
13
+ import utils
14
+
15
+ # Set numpy random seed
16
+ np.random.seed(cfg.RANDOM_SEED)
17
+
18
+
19
+ def detectRType(line: str):
20
+ """Detects the type of result file.
21
+
22
+ Args:
23
+ line: First line of text.
24
+
25
+ Returns:
26
+ Either "table", "r", "kaleidoscope", "csv" or "audacity".
27
+ """
28
+ if line.lower().startswith("selection"):
29
+ return "table"
30
+ elif line.lower().startswith("filepath"):
31
+ return "r"
32
+ elif line.lower().startswith("indir"):
33
+ return "kaleidoscope"
34
+ elif line.lower().startswith("start (s)"):
35
+ return "csv"
36
+ else:
37
+ return "audacity"
38
+
39
+
40
+ def parseFolders(apath: str, rpath: str, allowed_result_filetypes: list[str] = ["txt", "csv"]) -> list[dict]:
41
+ """Read audio and result files.
42
+
43
+ Reads all audio files and BirdNET output inside directory recursively.
44
+
45
+ Args:
46
+ apath: Path to search for audio files.
47
+ rpath: Path to search for result files.
48
+ allowed_result_filetypes: List of extensions for the result files.
49
+
50
+ Returns:
51
+ A list of {"audio": path_to_audio, "result": path_to_result }.
52
+ """
53
+ data = {}
54
+ apath = apath.replace("/", os.sep).replace("\\", os.sep)
55
+ rpath = rpath.replace("/", os.sep).replace("\\", os.sep)
56
+
57
+ # Get all audio files
58
+ for root, _, files in os.walk(apath):
59
+ for f in files:
60
+ if f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES:
61
+ data[f.rsplit(".", 1)[0]] = {"audio": os.path.join(root, f), "result": ""}
62
+
63
+ # Get all result files
64
+ for root, _, files in os.walk(rpath):
65
+ for f in files:
66
+ if f.rsplit(".", 1)[-1] in allowed_result_filetypes and ".BirdNET." in f:
67
+ data[f.split(".BirdNET.", 1)[0]]["result"] = os.path.join(root, f)
68
+
69
+ # Convert to list
70
+ flist = [f for f in data.values() if f["result"]]
71
+
72
+ print(f"Found {len(flist)} audio files with valid result file.")
73
+
74
+ return flist
75
+
76
+
77
+ def parseFiles(flist: list[dict], max_segments=100):
78
+ """Extracts the segments for all files.
79
+
80
+ Args:
81
+ flist: List of dict with {"audio": path_to_audio, "result": path_to_result }.
82
+ max_segments: Number of segments per species.
83
+
84
+ Returns:
85
+ TODO @kahst
86
+ """
87
+ species_segments: dict[str, list] = {}
88
+
89
+ for f in flist:
90
+ # Paths
91
+ afile = f["audio"]
92
+ rfile = f["result"]
93
+
94
+ # Get all segments for result file
95
+ segments = findSegments(afile, rfile)
96
+
97
+ # Parse segments by species
98
+ for s in segments:
99
+ if s["species"] not in species_segments:
100
+ species_segments[s["species"]] = []
101
+
102
+ species_segments[s["species"]].append(s)
103
+
104
+ # Shuffle segments for each species and limit to max_segments
105
+ for s in species_segments:
106
+ np.random.shuffle(species_segments[s])
107
+ species_segments[s] = species_segments[s][:max_segments]
108
+
109
+ # Make dict of segments per audio file
110
+ segments: dict[str, list] = {}
111
+ seg_cnt = 0
112
+
113
+ for s in species_segments:
114
+ for seg in species_segments[s]:
115
+ if seg["audio"] not in segments:
116
+ segments[seg["audio"]] = []
117
+
118
+ segments[seg["audio"]].append(seg)
119
+ seg_cnt += 1
120
+
121
+ print(f"Found {seg_cnt} segments in {len(segments)} audio files.")
122
+
123
+ # Convert to list
124
+ flist = [tuple(e) for e in segments.items()]
125
+
126
+ return flist
127
+
128
+
129
+ def findSegments(afile: str, rfile: str):
130
+ """Extracts the segments for an audio file from the results file
131
+
132
+ Args:
133
+ afile: Path to the audio file.
134
+ rfile: Path to the result file.
135
+
136
+ Returns:
137
+ A list of dicts in the form of
138
+ {"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
139
+ """
140
+ segments: list[dict] = []
141
+
142
+ # Open and parse result file
143
+ lines = utils.readLines(rfile)
144
+
145
+ # Auto-detect result type
146
+ rtype = detectRType(lines[0])
147
+
148
+ # Get start and end times based on rtype
149
+ confidence = 0
150
+ start = end = 0.0
151
+ species = ""
152
+
153
+ for i, line in enumerate(lines):
154
+ if rtype == "table" and i > 0:
155
+ d = line.split("\t")
156
+ start = float(d[3])
157
+ end = float(d[4])
158
+ species = d[-2]
159
+ confidence = float(d[-1])
160
+
161
+ elif rtype == "audacity":
162
+ d = line.split("\t")
163
+ start = float(d[0])
164
+ end = float(d[1])
165
+ species = d[2].split(", ")[1]
166
+ confidence = float(d[-1])
167
+
168
+ elif rtype == "r" and i > 0:
169
+ d = line.split(",")
170
+ start = float(d[1])
171
+ end = float(d[2])
172
+ species = d[4]
173
+ confidence = float(d[5])
174
+
175
+ elif rtype == "kaleidoscope" and i > 0:
176
+ d = line.split(",")
177
+ start = float(d[3])
178
+ end = float(d[4]) + start
179
+ species = d[5]
180
+ confidence = float(d[7])
181
+
182
+ elif rtype == "csv" and i > 0:
183
+ d = line.split(",")
184
+ start = float(d[0])
185
+ end = float(d[1])
186
+ species = d[3]
187
+ confidence = float(d[4])
188
+
189
+ # Check if confidence is high enough
190
+ if confidence >= cfg.MIN_CONFIDENCE:
191
+ segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
192
+
193
+ return segments
194
+
195
+
196
+ def extractSegments(item: tuple[tuple[str, list[dict]], float, dict[str]]):
197
+ """Saves each segment separately.
198
+
199
+ Creates an audio file for each species segment.
200
+
201
+ Args:
202
+ item: A tuple that contains ((audio file path, segments), segment length, config)
203
+ """
204
+ # Paths and config
205
+ afile = item[0][0]
206
+ segments = item[0][1]
207
+ seg_length = item[1]
208
+ cfg.setConfig(item[2])
209
+
210
+ # Status
211
+ print(f"Extracting segments from {afile}")
212
+
213
+ try:
214
+ # Open audio file
215
+ sig, _ = audio.openAudioFile(afile, cfg.SAMPLE_RATE)
216
+ except Exception as ex:
217
+ print(f"Error: Cannot open audio file {afile}", flush=True)
218
+ utils.writeErrorLog(ex)
219
+
220
+ return
221
+
222
+ # Extract segments
223
+ for seg_cnt, seg in enumerate(segments, 1):
224
+ try:
225
+ # Get start and end times
226
+ start = int(seg["start"] * cfg.SAMPLE_RATE)
227
+ end = int(seg["end"] * cfg.SAMPLE_RATE)
228
+ offset = ((seg_length * cfg.SAMPLE_RATE) - (end - start)) // 2
229
+ start = max(0, start - offset)
230
+ end = min(len(sig), end + offset)
231
+
232
+ # Make sure segment is long enough
233
+ if end > start:
234
+ # Get segment raw audio from signal
235
+ seg_sig = sig[int(start) : int(end)]
236
+
237
+ # Make output path
238
+ outpath = os.path.join(cfg.OUTPUT_PATH, seg["species"])
239
+ os.makedirs(outpath, exist_ok=True)
240
+
241
+ # Save segment
242
+ seg_name = "{:.3f}_{}_{}.wav".format(
243
+ seg["confidence"], seg_cnt, seg["audio"].rsplit(os.sep, 1)[-1].rsplit(".", 1)[0]
244
+ )
245
+ seg_path = os.path.join(outpath, seg_name)
246
+ audio.saveSignal(seg_sig, seg_path)
247
+
248
+ except Exception as ex:
249
+ # Write error log
250
+ print(f"Error: Cannot extract segments from {afile}.", flush=True)
251
+ utils.writeErrorLog(ex)
252
+ return False
253
+
254
+ return True
255
+
256
+
257
+ if __name__ == "__main__":
258
+ # Parse arguments
259
+ parser = argparse.ArgumentParser(description="Extract segments from audio files based on BirdNET detections.")
260
+ parser.add_argument("--audio", default="example/", help="Path to folder containing audio files.")
261
+ parser.add_argument("--results", default="example/", help="Path to folder containing result files.")
262
+ parser.add_argument("--o", default="example/", help="Output folder path for extracted segments.")
263
+ parser.add_argument(
264
+ "--min_conf", type=float, default=0.1, help="Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1."
265
+ )
266
+ parser.add_argument("--max_segments", type=int, default=100, help="Number of randomly extracted segments per species.")
267
+ parser.add_argument(
268
+ "--seg_length", type=float, default=3.0, help="Length of extracted segments in seconds. Defaults to 3.0."
269
+ )
270
+ parser.add_argument("--threads", type=int, default=4, help="Number of CPU threads.")
271
+
272
+ args = parser.parse_args()
273
+
274
+ # Parse audio and result folders
275
+ cfg.FILE_LIST = parseFolders(args.audio, args.results)
276
+
277
+ # Set output folder
278
+ cfg.OUTPUT_PATH = args.o
279
+
280
+ # Set number of threads
281
+ cfg.CPU_THREADS = int(args.threads)
282
+
283
+ # Set confidence threshold
284
+ cfg.MIN_CONFIDENCE = max(0.01, min(0.99, float(args.min_conf)))
285
+
286
+ # Parse file list and make list of segments
287
+ cfg.FILE_LIST = parseFiles(cfg.FILE_LIST, max(1, int(args.max_segments)))
288
+
289
+ # Add config items to each file list entry.
290
+ # We have to do this for Windows which does not
291
+ # support fork() and thus each process has to
292
+ # have its own config. USE LINUX!
293
+ flist = [(entry, max(cfg.SIG_LENGTH, float(args.seg_length)), cfg.getConfig()) for entry in cfg.FILE_LIST]
294
+
295
+ # Extract segments
296
+ if cfg.CPU_THREADS < 2:
297
+ for entry in flist:
298
+ extractSegments(entry)
299
+ else:
300
+ with Pool(cfg.CPU_THREADS) as p:
301
+ p.map(extractSegments, flist)
302
+
303
+ # A few examples to test
304
+ # python3 segments.py --audio example/ --results example/ --o example/segments/
305
+ # python3 segments.py --audio example/ --results example/ --o example/segments/ --seg_length 5.0 --min_conf 0.1 --max_segments 100 --threads 4