BilalSardar commited on
Commit
6b3d53a
1 Parent(s): d154e8d

Upload 2 files

Browse files
Files changed (2) hide show
  1. analyze.py +535 -0
  2. species.py +98 -0
analyze.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module to analyze audio samples.
2
+ """
3
+ import argparse
4
+ import datetime
5
+ import json
6
+ import operator
7
+ import os
8
+ import sys
9
+ from multiprocessing import Pool, freeze_support
10
+
11
+ import numpy as np
12
+
13
+ import audio
14
+ import config as cfg
15
+ import model
16
+ import species
17
+ import utils
18
+
19
+
20
+ def loadCodes():
21
+ """Loads the eBird codes.
22
+
23
+ Returns:
24
+ A dictionary containing the eBird codes.
25
+ """
26
+ with open(cfg.CODES_FILE, "r") as cfile:
27
+ codes = json.load(cfile)
28
+
29
+ return codes
30
+
31
+
32
+ def saveResultFile(r: dict[str, list], path: str, afile_path: str):
33
+ """Saves the results to the hard drive.
34
+
35
+ Args:
36
+ r: The dictionary with {segment: scores}.
37
+ path: The path where the result should be saved.
38
+ afile_path: The path to audio file.
39
+ """
40
+ # Make folder if it doesn't exist
41
+ if os.path.dirname(path):
42
+ os.makedirs(os.path.dirname(path), exist_ok=True)
43
+
44
+ # Selection table
45
+ out_string = ""
46
+
47
+ if cfg.RESULT_TYPE == "table":
48
+ # Raven selection header
49
+ header = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies Code\tCommon Name\tConfidence\n"
50
+ selection_id = 0
51
+
52
+ # Write header
53
+ out_string += header
54
+
55
+ # Read native sample rate
56
+ high_freq = audio.get_sample_rate(afile_path) / 2
57
+
58
+ if high_freq > cfg.SIG_FMAX:
59
+ high_freq = cfg.SIG_FMAX
60
+
61
+ # Extract valid predictions for every timestamp
62
+ for timestamp in getSortedTimestamps(r):
63
+ rstring = ""
64
+ start, end = timestamp.split("-", 1)
65
+
66
+ for c in r[timestamp]:
67
+ if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
68
+ selection_id += 1
69
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
70
+ rstring += "{}\tSpectrogram 1\t1\t{}\t{}\t{}\t{}\t{}\t{}\t{:.4f}\n".format(
71
+ selection_id,
72
+ start,
73
+ end,
74
+ cfg.SIG_FMIN,
75
+ high_freq,
76
+ cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0],
77
+ label.split("_", 1)[-1],
78
+ c[1],
79
+ )
80
+
81
+ # Write result string to file
82
+ out_string += rstring
83
+
84
+ elif cfg.RESULT_TYPE == "audacity":
85
+ # Audacity timeline labels
86
+ for timestamp in getSortedTimestamps(r):
87
+ rstring = ""
88
+
89
+ for c in r[timestamp]:
90
+ if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
91
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
92
+ rstring += "{}\t{}\t{:.4f}\n".format(timestamp.replace("-", "\t"), label.replace("_", ", "), c[1])
93
+
94
+ # Write result string to file
95
+ out_string += rstring
96
+
97
+ elif cfg.RESULT_TYPE == "r":
98
+ # Output format for R
99
+ header = "filepath,start,end,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity,min_conf,species_list,model"
100
+ out_string += header
101
+
102
+ for timestamp in getSortedTimestamps(r):
103
+ rstring = ""
104
+ start, end = timestamp.split("-", 1)
105
+
106
+ for c in r[timestamp]:
107
+ if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
108
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
109
+ rstring += "\n{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{},{},{},{}".format(
110
+ afile_path,
111
+ start,
112
+ end,
113
+ label.split("_", 1)[0],
114
+ label.split("_", 1)[-1],
115
+ c[1],
116
+ cfg.LATITUDE,
117
+ cfg.LONGITUDE,
118
+ cfg.WEEK,
119
+ cfg.SIG_OVERLAP,
120
+ (1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
121
+ cfg.MIN_CONFIDENCE,
122
+ cfg.SPECIES_LIST_FILE,
123
+ os.path.basename(cfg.MODEL_PATH),
124
+ )
125
+
126
+ # Write result string to file
127
+ out_string += rstring
128
+
129
+ elif cfg.RESULT_TYPE == "kaleidoscope":
130
+ # Output format for kaleidoscope
131
+ header = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity"
132
+ out_string += header
133
+
134
+ folder_path, filename = os.path.split(afile_path)
135
+ parent_folder, folder_name = os.path.split(folder_path)
136
+
137
+ for timestamp in getSortedTimestamps(r):
138
+ rstring = ""
139
+ start, end = timestamp.split("-", 1)
140
+
141
+ for c in r[timestamp]:
142
+ if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
143
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
144
+ rstring += "\n{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}".format(
145
+ parent_folder.rstrip("/"),
146
+ folder_name,
147
+ filename,
148
+ start,
149
+ float(end) - float(start),
150
+ label.split("_", 1)[0],
151
+ label.split("_", 1)[-1],
152
+ c[1],
153
+ cfg.LATITUDE,
154
+ cfg.LONGITUDE,
155
+ cfg.WEEK,
156
+ cfg.SIG_OVERLAP,
157
+ (1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
158
+ )
159
+
160
+ # Write result string to file
161
+ out_string += rstring
162
+
163
+ else:
164
+ # CSV output file
165
+ header = "Start (s),End (s),Scientific name,Common name,Confidence\n"
166
+
167
+ # Write header
168
+ out_string += header
169
+
170
+ for timestamp in getSortedTimestamps(r):
171
+ rstring = ""
172
+
173
+ for c in r[timestamp]:
174
+ start, end = timestamp.split("-", 1)
175
+
176
+ if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
177
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
178
+ rstring += "{},{},{},{},{:.4f}\n".format(start, end, label.split("_", 1)[0], label.split("_", 1)[-1], c[1])
179
+
180
+ # Write result string to file
181
+ out_string += rstring
182
+
183
+ # Save as file
184
+ with open(path, "w", encoding="utf-8") as rfile:
185
+ rfile.write(out_string)
186
+
187
+
188
+ def getSortedTimestamps(results: dict[str, list]):
189
+ """Sorts the results based on the segments.
190
+
191
+ Args:
192
+ results: The dictionary with {segment: scores}.
193
+
194
+ Returns:
195
+ Returns the sorted list of segments and their scores.
196
+ """
197
+ return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
198
+
199
+
200
+ def getRawAudioFromFile(fpath: str):
201
+ """Reads an audio file.
202
+
203
+ Reads the file and splits the signal into chunks.
204
+
205
+ Args:
206
+ fpath: Path to the audio file.
207
+
208
+ Returns:
209
+ The signal split into a list of chunks.
210
+ """
211
+ # Open file
212
+ sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE)
213
+
214
+ # Split into raw audio chunks
215
+ chunks = audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
216
+
217
+ return chunks
218
+
219
+
220
+ def predict(samples):
221
+ """Predicts the classes for the given samples.
222
+
223
+ Args:
224
+ samples: Samples to be predicted.
225
+
226
+ Returns:
227
+ The prediction scores.
228
+ """
229
+ # Prepare sample and pass through model
230
+ data = np.array(samples, dtype="float32")
231
+ prediction = model.predict(data)
232
+
233
+ # Logits or sigmoid activations?
234
+ if cfg.APPLY_SIGMOID:
235
+ prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-cfg.SIGMOID_SENSITIVITY)
236
+
237
+ return prediction
238
+
239
+
240
+ def analyzeFile(item):
241
+ """Analyzes a file.
242
+
243
+ Predicts the scores for the file and saves the results.
244
+
245
+ Args:
246
+ item: Tuple containing (file path, config)
247
+
248
+ Returns:
249
+ The `True` if the file was analyzed successfully.
250
+ """
251
+ # Get file path and restore cfg
252
+ fpath: str = item[0]
253
+ cfg.setConfig(item[1])
254
+
255
+ # Start time
256
+ start_time = datetime.datetime.now()
257
+
258
+ # Status
259
+ print(f"Analyzing {fpath}", flush=True)
260
+
261
+ try:
262
+ # Open audio file and split into 3-second chunks
263
+ chunks = getRawAudioFromFile(fpath)
264
+
265
+ # If no chunks, show error and skip
266
+ except Exception as ex:
267
+ print(f"Error: Cannot open audio file {fpath}", flush=True)
268
+ utils.writeErrorLog(ex)
269
+
270
+ return False
271
+
272
+ # Process each chunk
273
+ try:
274
+ start, end = 0, cfg.SIG_LENGTH
275
+ results = {}
276
+ samples = []
277
+ timestamps = []
278
+
279
+ for chunk_index, chunk in enumerate(chunks):
280
+ # Add to batch
281
+ samples.append(chunk)
282
+ timestamps.append([start, end])
283
+
284
+ # Advance start and end
285
+ start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
286
+ end = start + cfg.SIG_LENGTH
287
+
288
+ # Check if batch is full or last chunk
289
+ if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
290
+ continue
291
+
292
+ # Predict
293
+ p = predict(samples)
294
+
295
+ # Add to results
296
+ for i in range(len(samples)):
297
+ # Get timestamp
298
+ s_start, s_end = timestamps[i]
299
+
300
+ # Get prediction
301
+ pred = p[i]
302
+
303
+ # Assign scores to labels
304
+ p_labels = zip(cfg.LABELS, pred)
305
+
306
+ # Sort by score
307
+ p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
308
+
309
+ # Store top 5 results and advance indices
310
+ results[str(s_start) + "-" + str(s_end)] = p_sorted
311
+
312
+ # Clear batch
313
+ samples = []
314
+ timestamps = []
315
+
316
+ except Exception as ex:
317
+ # Write error log
318
+ print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
319
+ utils.writeErrorLog(ex)
320
+
321
+ return False
322
+
323
+ # Save as selection table
324
+ try:
325
+ # We have to check if output path is a file or directory
326
+ if not cfg.OUTPUT_PATH.rsplit(".", 1)[-1].lower() in ["txt", "csv"]:
327
+ rpath = fpath.replace(cfg.INPUT_PATH, "")
328
+ rpath = rpath[1:] if rpath[0] in ["/", "\\"] else rpath
329
+
330
+ # Make target directory if it doesn't exist
331
+ rdir = os.path.join(cfg.OUTPUT_PATH, os.path.dirname(rpath))
332
+
333
+ os.makedirs(rdir, exist_ok=True)
334
+
335
+ if cfg.RESULT_TYPE == "table":
336
+ rtype = ".BirdNET.selection.table.txt"
337
+ elif cfg.RESULT_TYPE == "audacity":
338
+ rtype = ".BirdNET.results.txt"
339
+ else:
340
+ rtype = ".BirdNET.results.csv"
341
+
342
+ saveResultFile(results, os.path.join(cfg.OUTPUT_PATH, rpath.rsplit(".", 1)[0] + rtype), fpath)
343
+ else:
344
+ saveResultFile(results, cfg.OUTPUT_PATH, fpath)
345
+
346
+ except Exception as ex:
347
+ # Write error log
348
+ print(f"Error: Cannot save result for {fpath}.\n", flush=True)
349
+ utils.writeErrorLog(ex)
350
+
351
+ return False
352
+
353
+ delta_time = (datetime.datetime.now() - start_time).total_seconds()
354
+ print("Finished {} in {:.2f} seconds".format(fpath, delta_time), flush=True)
355
+
356
+ return True
357
+
358
+
359
+ if __name__ == "__main__":
360
+ # Freeze support for executable
361
+ freeze_support()
362
+
363
+ # Parse arguments
364
+ parser = argparse.ArgumentParser(description="Analyze audio files with BirdNET")
365
+ parser.add_argument(
366
+ "--i", default="example/", help="Path to input file or folder. If this is a file, --o needs to be a file too."
367
+ )
368
+ parser.add_argument(
369
+ "--o", default="example/", help="Path to output file or folder. If this is a file, --i needs to be a file too."
370
+ )
371
+ parser.add_argument("--lat", type=float, default=-1, help="Recording location latitude. Set -1 to ignore.")
372
+ parser.add_argument("--lon", type=float, default=-1, help="Recording location longitude. Set -1 to ignore.")
373
+ parser.add_argument(
374
+ "--week",
375
+ type=int,
376
+ default=-1,
377
+ help="Week of the year when the recording was made. Values in [1, 48] (4 weeks per month). Set -1 for year-round species list.",
378
+ )
379
+ parser.add_argument(
380
+ "--slist",
381
+ default="",
382
+ help='Path to species list file or folder. If folder is provided, species list needs to be named "species_list.txt". If lat and lon are provided, this list will be ignored.',
383
+ )
384
+ parser.add_argument(
385
+ "--sensitivity",
386
+ type=float,
387
+ default=1.0,
388
+ help="Detection sensitivity; Higher values result in higher sensitivity. Values in [0.5, 1.5]. Defaults to 1.0.",
389
+ )
390
+ parser.add_argument(
391
+ "--min_conf", type=float, default=0.1, help="Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1."
392
+ )
393
+ parser.add_argument(
394
+ "--overlap", type=float, default=0.0, help="Overlap of prediction segments. Values in [0.0, 2.9]. Defaults to 0.0."
395
+ )
396
+ parser.add_argument(
397
+ "--rtype",
398
+ default="table",
399
+ help="Specifies output format. Values in ['table', 'audacity', 'r', 'kaleidoscope', 'csv']. Defaults to 'table' (Raven selection table).",
400
+ )
401
+ parser.add_argument("--threads", type=int, default=4, help="Number of CPU threads.")
402
+ parser.add_argument(
403
+ "--batchsize", type=int, default=1, help="Number of samples to process at the same time. Defaults to 1."
404
+ )
405
+ parser.add_argument(
406
+ "--locale",
407
+ default="en",
408
+ help="Locale for translated species common names. Values in ['af', 'de', 'it', ...] Defaults to 'en'.",
409
+ )
410
+ parser.add_argument(
411
+ "--sf_thresh",
412
+ type=float,
413
+ default=0.03,
414
+ help="Minimum species occurrence frequency threshold for location filter. Values in [0.01, 0.99]. Defaults to 0.03.",
415
+ )
416
+ parser.add_argument(
417
+ "--classifier",
418
+ default=None,
419
+ help="Path to custom trained classifier. Defaults to None. If set, --lat, --lon and --locale are ignored.",
420
+ )
421
+
422
+ args = parser.parse_args()
423
+
424
+ # Set paths relative to script path (requested in #3)
425
+ script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
426
+ cfg.MODEL_PATH = os.path.join(script_dir, cfg.MODEL_PATH)
427
+ cfg.LABELS_FILE = os.path.join(script_dir, cfg.LABELS_FILE)
428
+ cfg.TRANSLATED_LABELS_PATH = os.path.join(script_dir, cfg.TRANSLATED_LABELS_PATH)
429
+ cfg.MDATA_MODEL_PATH = os.path.join(script_dir, cfg.MDATA_MODEL_PATH)
430
+ cfg.CODES_FILE = os.path.join(script_dir, cfg.CODES_FILE)
431
+ cfg.ERROR_LOG_FILE = os.path.join(script_dir, cfg.ERROR_LOG_FILE)
432
+
433
+ # Load eBird codes, labels
434
+ cfg.CODES = loadCodes()
435
+ cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
436
+
437
+ # Set custom classifier?
438
+ if args.classifier is not None:
439
+ cfg.CUSTOM_CLASSIFIER = args.classifier # we treat this as absolute path, so no need to join with dirname
440
+ cfg.LABELS_FILE = args.classifier.replace(".tflite", "_Labels.txt") # same for labels file
441
+ cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
442
+ args.lat = -1
443
+ args.lon = -1
444
+ args.locale = "en"
445
+
446
+ # Load translated labels
447
+ lfile = os.path.join(
448
+ cfg.TRANSLATED_LABELS_PATH, os.path.basename(cfg.LABELS_FILE).replace(".txt", "_{}.txt".format(args.locale))
449
+ )
450
+
451
+ if not args.locale in ["en"] and os.path.isfile(lfile):
452
+ cfg.TRANSLATED_LABELS = utils.readLines(lfile)
453
+ else:
454
+ cfg.TRANSLATED_LABELS = cfg.LABELS
455
+
456
+ ### Make sure to comment out appropriately if you are not using args. ###
457
+
458
+ # Load species list from location filter or provided list
459
+ cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = args.lat, args.lon, args.week
460
+ cfg.LOCATION_FILTER_THRESHOLD = max(0.01, min(0.99, float(args.sf_thresh)))
461
+
462
+ if cfg.LATITUDE == -1 and cfg.LONGITUDE == -1:
463
+ if not args.slist:
464
+ cfg.SPECIES_LIST_FILE = None
465
+ else:
466
+ cfg.SPECIES_LIST_FILE = os.path.join(script_dir, args.slist)
467
+
468
+ if os.path.isdir(cfg.SPECIES_LIST_FILE):
469
+ cfg.SPECIES_LIST_FILE = os.path.join(cfg.SPECIES_LIST_FILE, "species_list.txt")
470
+
471
+ cfg.SPECIES_LIST = utils.readLines(cfg.SPECIES_LIST_FILE)
472
+ else:
473
+ cfg.SPECIES_LIST_FILE = None
474
+ cfg.SPECIES_LIST = species.getSpeciesList(cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK, cfg.LOCATION_FILTER_THRESHOLD)
475
+
476
+ if not cfg.SPECIES_LIST:
477
+ print(f"Species list contains {len(cfg.LABELS)} species")
478
+ else:
479
+ print(f"Species list contains {len(cfg.SPECIES_LIST)} species")
480
+
481
+ # Set input and output path
482
+ cfg.INPUT_PATH = args.i
483
+ cfg.OUTPUT_PATH = args.o
484
+
485
+ # Parse input files
486
+ if os.path.isdir(cfg.INPUT_PATH):
487
+ cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
488
+ print(f"Found {len(cfg.FILE_LIST)} files to analyze")
489
+ else:
490
+ cfg.FILE_LIST = [cfg.INPUT_PATH]
491
+
492
+ # Set confidence threshold
493
+ cfg.MIN_CONFIDENCE = max(0.01, min(0.99, float(args.min_conf)))
494
+
495
+ # Set sensitivity
496
+ cfg.SIGMOID_SENSITIVITY = max(0.5, min(1.0 - (float(args.sensitivity) - 1.0), 1.5))
497
+
498
+ # Set overlap
499
+ cfg.SIG_OVERLAP = max(0.0, min(2.9, float(args.overlap)))
500
+
501
+ # Set result type
502
+ cfg.RESULT_TYPE = args.rtype.lower()
503
+
504
+ if not cfg.RESULT_TYPE in ["table", "audacity", "r", "kaleidoscope", "csv"]:
505
+ cfg.RESULT_TYPE = "table"
506
+
507
+ # Set number of threads
508
+ if os.path.isdir(cfg.INPUT_PATH):
509
+ cfg.CPU_THREADS = max(1, int(args.threads))
510
+ cfg.TFLITE_THREADS = 1
511
+ else:
512
+ cfg.CPU_THREADS = 1
513
+ cfg.TFLITE_THREADS = max(1, int(args.threads))
514
+
515
+ # Set batch size
516
+ cfg.BATCH_SIZE = max(1, int(args.batchsize))
517
+
518
+ # Add config items to each file list entry.
519
+ # We have to do this for Windows which does not
520
+ # support fork() and thus each process has to
521
+ # have its own config. USE LINUX!
522
+ flist = [(f, cfg.getConfig()) for f in cfg.FILE_LIST]
523
+
524
+ # Analyze files
525
+ if cfg.CPU_THREADS < 2:
526
+ for entry in flist:
527
+ analyzeFile(entry)
528
+ else:
529
+ with Pool(cfg.CPU_THREADS) as p:
530
+ p.map(analyzeFile, flist)
531
+
532
+ # A few examples to test
533
+ # python3 analyze.py --i example/ --o example/ --slist example/ --min_conf 0.5 --threads 4
534
+ # python3 analyze.py --i example/soundscape.wav --o example/soundscape.BirdNET.selection.table.txt --slist example/species_list.txt --threads 8
535
+ # python3 analyze.py --i example/ --o example/ --lat 42.5 --lon -76.45 --week 4 --sensitivity 1.0 --rtype table --locale de
species.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module for predicting a species list.
2
+
3
+ Can be used to predict a species list using coordinates and weeks.
4
+ """
5
+ import argparse
6
+ import os
7
+ import sys
8
+
9
+ import config as cfg
10
+ import model
11
+ import utils
12
+
13
+
14
+ def getSpeciesList(lat: float, lon: float, week: int, threshold=0.05, sort=False) -> list[str]:
15
+ """Predict a species list.
16
+
17
+ Uses the model to predict the species list for the given coordinates and filters by threshold.
18
+
19
+ Args:
20
+ lat: The latitude.
21
+ lon: The longitude.
22
+ week: The week of the year [1-48]. Use -1 for year-round.
23
+ threshold: Only values above or equal to threshold will be shown.
24
+ sort: If the species list should be sorted.
25
+
26
+ Returns:
27
+ A list of all eligible species.
28
+ """
29
+ # Extract species from model
30
+ pred = model.explore(lat, lon, week)
31
+
32
+ # Make species list
33
+ slist = [p[1] for p in pred if p[0] >= threshold]
34
+
35
+ return sorted(slist) if sort else slist
36
+
37
+
38
+ if __name__ == "__main__":
39
+ # Parse arguments
40
+ parser = argparse.ArgumentParser(
41
+ description="Get list of species for a given location with BirdNET. Sorted by occurrence frequency."
42
+ )
43
+ parser.add_argument(
44
+ "--o",
45
+ default="example/",
46
+ help="Path to output file or folder. If this is a folder, file will be named 'species_list.txt'.",
47
+ )
48
+ parser.add_argument("--lat", type=float, help="Recording location latitude.")
49
+ parser.add_argument("--lon", type=float, help="Recording location longitude.")
50
+ parser.add_argument(
51
+ "--week",
52
+ type=int,
53
+ default=-1,
54
+ help="Week of the year when the recording was made. Values in [1, 48] (4 weeks per month). Set -1 for year-round species list.",
55
+ )
56
+ parser.add_argument("--threshold", type=float, default=0.05, help="Occurrence frequency threshold. Defaults to 0.05.")
57
+ parser.add_argument(
58
+ "--sortby",
59
+ default="freq",
60
+ help="Sort species by occurrence frequency or alphabetically. Values in ['freq', 'alpha']. Defaults to 'freq'.",
61
+ )
62
+
63
+ args = parser.parse_args()
64
+
65
+ # Set paths relative to script path (requested in #3)
66
+ cfg.LABELS_FILE = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), cfg.LABELS_FILE)
67
+ cfg.MDATA_MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), cfg.MDATA_MODEL_PATH)
68
+
69
+ # Load eBird codes, labels
70
+ cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
71
+
72
+ # Set output path
73
+ cfg.OUTPUT_PATH = args.o
74
+
75
+ if os.path.isdir(cfg.OUTPUT_PATH):
76
+ cfg.OUTPUT_PATH = os.path.join(cfg.OUTPUT_PATH, "species_list.txt")
77
+
78
+ # Set config
79
+ cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = args.lat, args.lon, args.week
80
+ cfg.LOCATION_FILTER_THRESHOLD = args.threshold
81
+
82
+ print(f"Getting species list for {cfg.LATITUDE}/{cfg.LONGITUDE}, Week {cfg.WEEK}...", end="", flush=True)
83
+
84
+ # Get species list
85
+ species_list = getSpeciesList(
86
+ cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK, cfg.LOCATION_FILTER_THRESHOLD, False if args.sortby == "freq" else True
87
+ )
88
+
89
+ print(f"Done. {len(species_list)} species on list.", flush=True)
90
+
91
+ # Save species list
92
+ with open(cfg.OUTPUT_PATH, "w") as f:
93
+ for s in species_list:
94
+ f.write(s + "\n")
95
+
96
+ # A few examples to test
97
+ # python3 species.py --o example/ --lat 42.5 --lon -76.45 --week -1
98
+ # python3 species.py --o example/species_list.txt --lat 42.5 --lon -76.45 --week 4 --threshold 0.05 --sortby alpha