Upload 4 files
Browse files- .gitattributes +1 -0
- animals.index +3 -0
- hubert.py +230 -0
- noms_animaux.txt +0 -0
- requirements.txt +11 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
animals.index filter=lfs diff=lfs merge=lfs -text
|
animals.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd8aec7b9a087ca44b5aee67f9493ff3758e82732da26c0d50a6a713a216ebe6
|
3 |
+
size 13609005
|
hubert.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import necessary libraries for the project
|
2 |
+
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
|
3 |
+
import torch
|
4 |
+
import os
|
5 |
+
import numpy as np
|
6 |
+
import faiss
|
7 |
+
import pandas as pd
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import time
|
10 |
+
import torchaudio
|
11 |
+
import gc
|
12 |
+
import math
|
13 |
+
import gradio as gr
|
14 |
+
import sys
|
15 |
+
|
16 |
+
# Load the model used for audio vectorization
|
17 |
+
bundle = torchaudio.pipelines.HUBERT_BASE
|
18 |
+
model = bundle.get_model()
|
19 |
+
|
20 |
+
# Variable containing the path to the animals.index file on your computer
|
21 |
+
index_path = "/Users/ariel/Downloads/animals.index"
|
22 |
+
# Read the animals.index file
|
23 |
+
index = faiss.read_index(index_path)
|
24 |
+
|
25 |
+
# Variable containing the path to the noms_animaux.txt file
|
26 |
+
chemin_noms_animaux = '/Users/ariel/Downloads/noms_animaux.txt'
|
27 |
+
|
28 |
+
# Process the noms_animaux.txt file to link vectors in animals.index to names in noms_animaux.txt
|
29 |
+
# Read the file content and convert it to a list
|
30 |
+
with open(chemin_noms_animaux, 'r') as fichier:
|
31 |
+
# Use a list comprehension to process each line
|
32 |
+
names = [line.strip().strip("'").strip(",").strip() for line in fichier.readlines()]
|
33 |
+
|
34 |
+
def bayes_theorem(df, n_top_vectors=50):
|
35 |
+
"""
|
36 |
+
Calculate posterior probabilities using Bayes' theorem.
|
37 |
+
|
38 |
+
This function limits the DataFrame to the top n vectors, calculates the sum of similarities
|
39 |
+
for each category, and computes the posterior probabilities normalized by the total probability.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
df (pd.DataFrame): DataFrame containing similarity percentages and categories.
|
43 |
+
n_top_vectors (int): Number of top vectors to consider.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
dict: Normalized posterior probabilities for each category.
|
47 |
+
"""
|
48 |
+
# Limit the DataFrame to the top n vectors
|
49 |
+
df_limited = df.head(n_top_vectors)
|
50 |
+
# Get unique categories and initialize the posterior probabilities dictionary
|
51 |
+
categories = df_limited['names_normalized'].unique()
|
52 |
+
probas_a_posteriori = {categorie: 0 for categorie in categories}
|
53 |
+
# Calculate uniform prior probabilities
|
54 |
+
probas_a_priori = 1/3
|
55 |
+
# Sum similarities for each category limited to the top n vectors
|
56 |
+
for categorie in categories:
|
57 |
+
somme_similarites = df_limited[df_limited['names_normalized'] == categorie]['percentage'].sum()
|
58 |
+
probas_a_posteriori[categorie] = somme_similarites * probas_a_priori
|
59 |
+
# Normalize the posterior probabilities
|
60 |
+
total_proba = sum(probas_a_posteriori.values())
|
61 |
+
probas_a_posteriori_normalisees = {categorie: (proba / total_proba) for categorie, proba in probas_a_posteriori.items()}
|
62 |
+
return probas_a_posteriori_normalisees
|
63 |
+
|
64 |
+
def get_name_from_index(index):
|
65 |
+
"""
|
66 |
+
Get the animal name corresponding to a given vector index.
|
67 |
+
|
68 |
+
Parameters:
|
69 |
+
index (int): Index of the vector.
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
str: Name of the animal.
|
73 |
+
"""
|
74 |
+
return names[index]
|
75 |
+
|
76 |
+
def name_normalisation(name):
|
77 |
+
"""
|
78 |
+
Normalize animal names.
|
79 |
+
|
80 |
+
This function normalizes the names of animals by categorizing them into common types.
|
81 |
+
|
82 |
+
Parameters:
|
83 |
+
name (str): Name of the animal.
|
84 |
+
|
85 |
+
Returns:
|
86 |
+
str: Normalized animal name.
|
87 |
+
"""
|
88 |
+
if 'dog' in name:
|
89 |
+
return "Chien"
|
90 |
+
elif 'cat' in name:
|
91 |
+
return "Chat"
|
92 |
+
elif 'bird' in name:
|
93 |
+
return "Oiseau"
|
94 |
+
else:
|
95 |
+
return "Animal non reconnu"
|
96 |
+
|
97 |
+
def exp_negative(x):
|
98 |
+
"""
|
99 |
+
Define the negative exponential function.
|
100 |
+
|
101 |
+
This function applies the negative exponential transformation to a given value.
|
102 |
+
|
103 |
+
Parameters:
|
104 |
+
x (float): Input value.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
float: Transformed value.
|
108 |
+
"""
|
109 |
+
return math.exp(-x)
|
110 |
+
|
111 |
+
def normalization(embeddings):
|
112 |
+
"""
|
113 |
+
Normalize vectors.
|
114 |
+
|
115 |
+
This function normalizes either a single vector (1D) or a matrix of vectors (2D).
|
116 |
+
If the input is 1D, it normalizes the single vector; if 2D, it normalizes each row.
|
117 |
+
|
118 |
+
Parameters:
|
119 |
+
embeddings (np.ndarray): Input vector or matrix of vectors.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
np.ndarray: Normalized vector or matrix of vectors.
|
123 |
+
"""
|
124 |
+
# Check if embeddings is a single vector (1D) or a matrix (2D)
|
125 |
+
if embeddings.ndim == 1:
|
126 |
+
# Normalize a single vector
|
127 |
+
norm = np.linalg.norm(embeddings)
|
128 |
+
if norm == 0:
|
129 |
+
return embeddings
|
130 |
+
return embeddings / norm
|
131 |
+
else:
|
132 |
+
# Normalize each row of a matrix
|
133 |
+
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
134 |
+
return embeddings / norms
|
135 |
+
|
136 |
+
def get_audio_embedding(audio_path):
|
137 |
+
"""
|
138 |
+
Get the audio embedding for a given audio file.
|
139 |
+
|
140 |
+
This function loads the audio file, processes it to obtain the emission,
|
141 |
+
flattens and averages the features, normalizes them, and returns the normalized 2D array.
|
142 |
+
|
143 |
+
Parameters:
|
144 |
+
audio_path (str): Path to the audio file.
|
145 |
+
|
146 |
+
Returns:
|
147 |
+
np.ndarray: Normalized 2D array of audio embedding.
|
148 |
+
"""
|
149 |
+
waveform1, sample_rate1 = torchaudio.load(audio_path)
|
150 |
+
waveform1 = torchaudio.functional.resample(waveform1, sample_rate1, bundle.sample_rate)
|
151 |
+
with torch.inference_mode():
|
152 |
+
emission1, _ = model(waveform1)
|
153 |
+
|
154 |
+
# Flatten the first two dimensions and keep the third
|
155 |
+
flattened_features1 = emission1.view(-1, emission1.size(2))
|
156 |
+
mean_features1 = flattened_features1.mean(dim=0)
|
157 |
+
mean1_array = mean_features1.cpu().numpy().astype(np.float32)
|
158 |
+
mean1_normal = normalization(mean1_array)
|
159 |
+
mean1_normal_2d = mean1_normal[np.newaxis, :]
|
160 |
+
return mean1_normal_2d
|
161 |
+
|
162 |
+
def searchinIndex(index, normal_embedding):
|
163 |
+
"""
|
164 |
+
Search for the closest audio vectors in the animals.index file.
|
165 |
+
|
166 |
+
This function searches the FAISS index for the most similar vectors to the given input embedding.
|
167 |
+
|
168 |
+
Parameters:
|
169 |
+
index (faiss.Index): The FAISS index to search.
|
170 |
+
normal_embedding (np.ndarray): The normalized embedding to search for.
|
171 |
+
|
172 |
+
Returns:
|
173 |
+
pd.DataFrame: DataFrame containing distances and indices of the closest vectors.
|
174 |
+
"""
|
175 |
+
D, I = index.search(normal_embedding, index.ntotal)
|
176 |
+
r = pd.DataFrame({'distance': D[0], 'index': I[0]})
|
177 |
+
return r
|
178 |
+
|
179 |
+
def animal_classification(audio_path):
|
180 |
+
"""
|
181 |
+
Classify the species of animals from an audio file.
|
182 |
+
|
183 |
+
This function extracts the audio embedding, searches the index, calculates similarity percentages,
|
184 |
+
normalizes the names, and applies Bayes' theorem to determine the most likely animal.
|
185 |
+
|
186 |
+
Parameters:
|
187 |
+
audio_path (str): Path to the audio file.
|
188 |
+
|
189 |
+
Returns:
|
190 |
+
str: Formatted result with animal classifications and their probabilities.
|
191 |
+
"""
|
192 |
+
query_audio = get_audio_embedding(audio_path) # Get the audio embedding
|
193 |
+
results = searchinIndex(index, query_audio) # Search the index
|
194 |
+
results['percentage'] = results['distance'].apply(exp_negative) * 100 # Calculate the percentage
|
195 |
+
results['names'] = results['index'].apply(get_name_from_index) # Get names from the index
|
196 |
+
results['names_normalized'] = results['names'].apply(name_normalisation) # Normalize the names
|
197 |
+
resultat = bayes_theorem(results, 25)
|
198 |
+
formatted_result = '\n'.join([f"{animal}: {percentage:.2%}" for animal, percentage in resultat.items()])
|
199 |
+
return formatted_result
|
200 |
+
|
201 |
+
def add_in_index(audio_path):
|
202 |
+
"""
|
203 |
+
Add a new audio to the index for better classification.
|
204 |
+
|
205 |
+
This function extracts the audio embedding from a new audio file, adds it to the FAISS index,
|
206 |
+
updates the index file, and appends the name to the names list.
|
207 |
+
|
208 |
+
Parameters:
|
209 |
+
audio_path (str): Path to the audio file to be added.
|
210 |
+
|
211 |
+
Returns:
|
212 |
+
str: Confirmation message indicating the addition was successful.
|
213 |
+
"""
|
214 |
+
new_audio = get_audio_embedding(audio_path)
|
215 |
+
index.add(new_audio)
|
216 |
+
faiss.write_index(index, index_path)
|
217 |
+
file_name = os.path.basename(audio_path)
|
218 |
+
names.append(file_name)
|
219 |
+
result = "L'ajout a bien effectué"
|
220 |
+
with open(chemin_noms_animaux, 'w') as fichier:
|
221 |
+
# Write each name to the file, formatted as a Python list element
|
222 |
+
for nom in names:
|
223 |
+
fichier.write(f"'{nom}',\n")
|
224 |
+
return result
|
225 |
+
|
226 |
+
# Create the graphical interface
|
227 |
+
interface = gr.Interface(fn=animal_classification, inputs="file", outputs="text")
|
228 |
+
|
229 |
+
# Launch the interface
|
230 |
+
interface.launch()
|
noms_animaux.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torchaudio
|
2 |
+
numpy
|
3 |
+
soundfile
|
4 |
+
statistics
|
5 |
+
pandas
|
6 |
+
pyarrow
|
7 |
+
torchaudio
|
8 |
+
matplotlib
|
9 |
+
torch
|
10 |
+
transformers
|
11 |
+
faiss-cpu
|