|
|
|
import os |
|
import csv |
|
import soundfile as sf |
|
import numpy as np |
|
import json |
|
import time |
|
import gradio |
|
import torchaudio |
|
from torchvision.utils import save_image |
|
from pathlib import Path |
|
import sys |
|
import pandas as pd |
|
from PIL import Image |
|
|
|
from ia_model_utils import * |
|
|
|
|
|
DATADIR = os.getcwd() |
|
model_path = os.path.join(DATADIR, "ia_data/") |
|
|
|
|
|
|
|
|
|
from ia_model_utils import * |
|
|
|
|
|
os.chdir(DATADIR) |
|
|
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
bird_call_detection = load_model(model_path, post_nms_topN_eval=50, device=device) |
|
|
|
|
|
EXAMPLES_PATH = Path(".") |
|
|
|
|
|
example_files = [] |
|
fname_examples = example_files |
|
|
|
print("Voici la liste des fichiers disponibles dans l'environement") |
|
print(fname_examples) |
|
|
|
|
|
interface_options = { |
|
"title": "NBM Classification", |
|
"description": "Online NBM classifier: Please upload a wavfile", |
|
"article": "Online NBM classifier: Please upload a wavfile", |
|
|
|
"examples": fname_examples, |
|
"allow_flagging": "never", |
|
} |
|
|
|
|
|
def modeloutput_to_formated_data(class_bbox, temp_res, out_fname="labels-test.txt"): |
|
"This function format the output data of the model to be standard Audacity format" |
|
|
|
|
|
dict_dir = "./ia_data" |
|
with open(os.path.join(dict_dir, "bird_dict.json"), "r") as f: |
|
birds_dict = json.load(f) |
|
|
|
birds_dict.update({"Non bird sound": 0, "Other": len(birds_dict) + 1}) |
|
reverse_dict = {id: bird_name for bird_name, id in birds_dict.items()} |
|
|
|
|
|
output = { |
|
reverse_dict[idx]: { |
|
key: value.cpu().numpy().tolist() |
|
for key, value in class_bbox[str(idx)].items() |
|
} |
|
for idx in range(len(reverse_dict)) |
|
if len(class_bbox[str(idx)]["bbox_coord"]) > 0 |
|
} |
|
|
|
|
|
|
|
|
|
table = [] |
|
table.append(["label", "score", "x1", "y1", "x2", "y2"]) |
|
|
|
for species_entry in output.items(): |
|
for i in range(len(species_entry[1]["bbox_coord"])): |
|
label = species_entry[0] |
|
bbox = species_entry[1]["bbox_coord"][i] |
|
score = species_entry[1]["scores"][i] |
|
row = [label, score] + bbox |
|
table.append(row) |
|
|
|
|
|
table[1:].sort(key=lambda entry: float(entry[2])) |
|
|
|
|
|
data = "" |
|
for row in table[1:]: |
|
label = row[0] |
|
score = row[1] |
|
if label == "Non bird sound": |
|
continue |
|
if label == "Other": |
|
continue |
|
x1, y1, x2, y2 = row[2:] |
|
|
|
y1 = y1 * 33.3 + 500 |
|
y2 = y2 * 33.3 + 500 |
|
x1 *= temp_res |
|
x2 *= temp_res |
|
|
|
entry = f"{x1}\t{x2}\t{label} {score:.2f}\n\\\t{y1}\t{y2}\n" |
|
data += entry |
|
with open(out_fname, "w") as f: |
|
f.write(data) |
|
return out_fname, data |
|
|
|
|
|
def end2endpipeline(wav_path): |
|
class_bbox, spectrogram, temp_res = bird_call_detection.process_wav( |
|
wav_path, min_score=0.5 |
|
) |
|
out_fname = Path(wav_path).stem + ".txt" |
|
out_f, data = modeloutput_to_formated_data( |
|
class_bbox, temp_res, out_fname=out_fname |
|
) |
|
|
|
return out_f, data |
|
|
|
|
|
demo = gradio.Interface( |
|
fn=end2endpipeline, |
|
inputs=gradio.Audio(sources="upload", type="filepath"), |
|
outputs=[gradio.File(), gradio.Textbox(interactive=False)], |
|
**interface_options, |
|
) |
|
|
|
launch_options = { |
|
|
|
|
|
"inline": True, |
|
|
|
} |
|
|
|
demo.launch(**launch_options) |
|
|