femiari commited on
Commit
7560c23
1 Parent(s): 02374d4

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. animals.index +3 -0
  3. hubert.py +230 -0
  4. noms_animaux.txt +0 -0
  5. requirements.txt +11 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ animals.index filter=lfs diff=lfs merge=lfs -text
animals.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd8aec7b9a087ca44b5aee67f9493ff3758e82732da26c0d50a6a713a216ebe6
3
+ size 13609005
hubert.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries for the project
2
+ from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
3
+ import torch
4
+ import os
5
+ import numpy as np
6
+ import faiss
7
+ import pandas as pd
8
+ import matplotlib.pyplot as plt
9
+ import time
10
+ import torchaudio
11
+ import gc
12
+ import math
13
+ import gradio as gr
14
+ import sys
15
+
16
+ # Load the model used for audio vectorization
17
+ bundle = torchaudio.pipelines.HUBERT_BASE
18
+ model = bundle.get_model()
19
+
20
+ # Variable containing the path to the animals.index file on your computer
21
+ index_path = "/Users/ariel/Downloads/animals.index"
22
+ # Read the animals.index file
23
+ index = faiss.read_index(index_path)
24
+
25
+ # Variable containing the path to the noms_animaux.txt file
26
+ chemin_noms_animaux = '/Users/ariel/Downloads/noms_animaux.txt'
27
+
28
+ # Process the noms_animaux.txt file to link vectors in animals.index to names in noms_animaux.txt
29
+ # Read the file content and convert it to a list
30
+ with open(chemin_noms_animaux, 'r') as fichier:
31
+ # Use a list comprehension to process each line
32
+ names = [line.strip().strip("'").strip(",").strip() for line in fichier.readlines()]
33
+
34
+ def bayes_theorem(df, n_top_vectors=50):
35
+ """
36
+ Calculate posterior probabilities using Bayes' theorem.
37
+
38
+ This function limits the DataFrame to the top n vectors, calculates the sum of similarities
39
+ for each category, and computes the posterior probabilities normalized by the total probability.
40
+
41
+ Parameters:
42
+ df (pd.DataFrame): DataFrame containing similarity percentages and categories.
43
+ n_top_vectors (int): Number of top vectors to consider.
44
+
45
+ Returns:
46
+ dict: Normalized posterior probabilities for each category.
47
+ """
48
+ # Limit the DataFrame to the top n vectors
49
+ df_limited = df.head(n_top_vectors)
50
+ # Get unique categories and initialize the posterior probabilities dictionary
51
+ categories = df_limited['names_normalized'].unique()
52
+ probas_a_posteriori = {categorie: 0 for categorie in categories}
53
+ # Calculate uniform prior probabilities
54
+ probas_a_priori = 1/3
55
+ # Sum similarities for each category limited to the top n vectors
56
+ for categorie in categories:
57
+ somme_similarites = df_limited[df_limited['names_normalized'] == categorie]['percentage'].sum()
58
+ probas_a_posteriori[categorie] = somme_similarites * probas_a_priori
59
+ # Normalize the posterior probabilities
60
+ total_proba = sum(probas_a_posteriori.values())
61
+ probas_a_posteriori_normalisees = {categorie: (proba / total_proba) for categorie, proba in probas_a_posteriori.items()}
62
+ return probas_a_posteriori_normalisees
63
+
64
+ def get_name_from_index(index):
65
+ """
66
+ Get the animal name corresponding to a given vector index.
67
+
68
+ Parameters:
69
+ index (int): Index of the vector.
70
+
71
+ Returns:
72
+ str: Name of the animal.
73
+ """
74
+ return names[index]
75
+
76
+ def name_normalisation(name):
77
+ """
78
+ Normalize animal names.
79
+
80
+ This function normalizes the names of animals by categorizing them into common types.
81
+
82
+ Parameters:
83
+ name (str): Name of the animal.
84
+
85
+ Returns:
86
+ str: Normalized animal name.
87
+ """
88
+ if 'dog' in name:
89
+ return "Chien"
90
+ elif 'cat' in name:
91
+ return "Chat"
92
+ elif 'bird' in name:
93
+ return "Oiseau"
94
+ else:
95
+ return "Animal non reconnu"
96
+
97
+ def exp_negative(x):
98
+ """
99
+ Define the negative exponential function.
100
+
101
+ This function applies the negative exponential transformation to a given value.
102
+
103
+ Parameters:
104
+ x (float): Input value.
105
+
106
+ Returns:
107
+ float: Transformed value.
108
+ """
109
+ return math.exp(-x)
110
+
111
+ def normalization(embeddings):
112
+ """
113
+ Normalize vectors.
114
+
115
+ This function normalizes either a single vector (1D) or a matrix of vectors (2D).
116
+ If the input is 1D, it normalizes the single vector; if 2D, it normalizes each row.
117
+
118
+ Parameters:
119
+ embeddings (np.ndarray): Input vector or matrix of vectors.
120
+
121
+ Returns:
122
+ np.ndarray: Normalized vector or matrix of vectors.
123
+ """
124
+ # Check if embeddings is a single vector (1D) or a matrix (2D)
125
+ if embeddings.ndim == 1:
126
+ # Normalize a single vector
127
+ norm = np.linalg.norm(embeddings)
128
+ if norm == 0:
129
+ return embeddings
130
+ return embeddings / norm
131
+ else:
132
+ # Normalize each row of a matrix
133
+ norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
134
+ return embeddings / norms
135
+
136
+ def get_audio_embedding(audio_path):
137
+ """
138
+ Get the audio embedding for a given audio file.
139
+
140
+ This function loads the audio file, processes it to obtain the emission,
141
+ flattens and averages the features, normalizes them, and returns the normalized 2D array.
142
+
143
+ Parameters:
144
+ audio_path (str): Path to the audio file.
145
+
146
+ Returns:
147
+ np.ndarray: Normalized 2D array of audio embedding.
148
+ """
149
+ waveform1, sample_rate1 = torchaudio.load(audio_path)
150
+ waveform1 = torchaudio.functional.resample(waveform1, sample_rate1, bundle.sample_rate)
151
+ with torch.inference_mode():
152
+ emission1, _ = model(waveform1)
153
+
154
+ # Flatten the first two dimensions and keep the third
155
+ flattened_features1 = emission1.view(-1, emission1.size(2))
156
+ mean_features1 = flattened_features1.mean(dim=0)
157
+ mean1_array = mean_features1.cpu().numpy().astype(np.float32)
158
+ mean1_normal = normalization(mean1_array)
159
+ mean1_normal_2d = mean1_normal[np.newaxis, :]
160
+ return mean1_normal_2d
161
+
162
+ def searchinIndex(index, normal_embedding):
163
+ """
164
+ Search for the closest audio vectors in the animals.index file.
165
+
166
+ This function searches the FAISS index for the most similar vectors to the given input embedding.
167
+
168
+ Parameters:
169
+ index (faiss.Index): The FAISS index to search.
170
+ normal_embedding (np.ndarray): The normalized embedding to search for.
171
+
172
+ Returns:
173
+ pd.DataFrame: DataFrame containing distances and indices of the closest vectors.
174
+ """
175
+ D, I = index.search(normal_embedding, index.ntotal)
176
+ r = pd.DataFrame({'distance': D[0], 'index': I[0]})
177
+ return r
178
+
179
+ def animal_classification(audio_path):
180
+ """
181
+ Classify the species of animals from an audio file.
182
+
183
+ This function extracts the audio embedding, searches the index, calculates similarity percentages,
184
+ normalizes the names, and applies Bayes' theorem to determine the most likely animal.
185
+
186
+ Parameters:
187
+ audio_path (str): Path to the audio file.
188
+
189
+ Returns:
190
+ str: Formatted result with animal classifications and their probabilities.
191
+ """
192
+ query_audio = get_audio_embedding(audio_path) # Get the audio embedding
193
+ results = searchinIndex(index, query_audio) # Search the index
194
+ results['percentage'] = results['distance'].apply(exp_negative) * 100 # Calculate the percentage
195
+ results['names'] = results['index'].apply(get_name_from_index) # Get names from the index
196
+ results['names_normalized'] = results['names'].apply(name_normalisation) # Normalize the names
197
+ resultat = bayes_theorem(results, 25)
198
+ formatted_result = '\n'.join([f"{animal}: {percentage:.2%}" for animal, percentage in resultat.items()])
199
+ return formatted_result
200
+
201
+ def add_in_index(audio_path):
202
+ """
203
+ Add a new audio to the index for better classification.
204
+
205
+ This function extracts the audio embedding from a new audio file, adds it to the FAISS index,
206
+ updates the index file, and appends the name to the names list.
207
+
208
+ Parameters:
209
+ audio_path (str): Path to the audio file to be added.
210
+
211
+ Returns:
212
+ str: Confirmation message indicating the addition was successful.
213
+ """
214
+ new_audio = get_audio_embedding(audio_path)
215
+ index.add(new_audio)
216
+ faiss.write_index(index, index_path)
217
+ file_name = os.path.basename(audio_path)
218
+ names.append(file_name)
219
+ result = "L'ajout a bien effectué"
220
+ with open(chemin_noms_animaux, 'w') as fichier:
221
+ # Write each name to the file, formatted as a Python list element
222
+ for nom in names:
223
+ fichier.write(f"'{nom}',\n")
224
+ return result
225
+
226
+ # Create the graphical interface
227
+ interface = gr.Interface(fn=animal_classification, inputs="file", outputs="text")
228
+
229
+ # Launch the interface
230
+ interface.launch()
noms_animaux.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torchaudio
2
+ numpy
3
+ soundfile
4
+ statistics
5
+ pandas
6
+ pyarrow
7
+ torchaudio
8
+ matplotlib
9
+ torch
10
+ transformers
11
+ faiss-cpu