Spaces:

femiari
/

Animalvox

Sleeping

App Files Files Community

femiari commited on Jun 18

Commit

7560c23

•

1 Parent(s): 02374d4

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
animals.index +3 -0
hubert.py +230 -0
noms_animaux.txt +0 -0
requirements.txt +11 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+animals.index filter=lfs diff=lfs merge=lfs -text

animals.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd8aec7b9a087ca44b5aee67f9493ff3758e82732da26c0d50a6a713a216ebe6
+size 13609005

hubert.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# Import necessary libraries for the project
+from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
+import torch
+import os
+import numpy as np
+import faiss
+import pandas as pd
+import matplotlib.pyplot as plt
+import time
+import torchaudio
+import gc
+import math
+import gradio as gr
+import sys
+# Load the model used for audio vectorization
+bundle = torchaudio.pipelines.HUBERT_BASE
+model = bundle.get_model()
+# Variable containing the path to the animals.index file on your computer
+index_path = "/Users/ariel/Downloads/animals.index"
+# Read the animals.index file
+index = faiss.read_index(index_path)
+# Variable containing the path to the noms_animaux.txt file
+chemin_noms_animaux = '/Users/ariel/Downloads/noms_animaux.txt'
+# Process the noms_animaux.txt file to link vectors in animals.index to names in noms_animaux.txt
+# Read the file content and convert it to a list
+with open(chemin_noms_animaux, 'r') as fichier:
+    # Use a list comprehension to process each line
+    names = [line.strip().strip("'").strip(",").strip() for line in fichier.readlines()]
+def bayes_theorem(df, n_top_vectors=50):
+    """
+    Calculate posterior probabilities using Bayes' theorem.
+    This function limits the DataFrame to the top n vectors, calculates the sum of similarities
+    for each category, and computes the posterior probabilities normalized by the total probability.
+    Parameters:
+    df (pd.DataFrame): DataFrame containing similarity percentages and categories.
+    n_top_vectors (int): Number of top vectors to consider.
+    Returns:
+    dict: Normalized posterior probabilities for each category.
+    """
+    # Limit the DataFrame to the top n vectors
+    df_limited = df.head(n_top_vectors)
+    # Get unique categories and initialize the posterior probabilities dictionary
+    categories = df_limited['names_normalized'].unique()
+    probas_a_posteriori = {categorie: 0 for categorie in categories}
+    # Calculate uniform prior probabilities
+    probas_a_priori = 1/3
+    # Sum similarities for each category limited to the top n vectors
+    for categorie in categories:
+        somme_similarites = df_limited[df_limited['names_normalized'] == categorie]['percentage'].sum()
+        probas_a_posteriori[categorie] = somme_similarites * probas_a_priori
+    # Normalize the posterior probabilities
+    total_proba = sum(probas_a_posteriori.values())
+    probas_a_posteriori_normalisees = {categorie: (proba / total_proba) for categorie, proba in probas_a_posteriori.items()}
+    return probas_a_posteriori_normalisees
+def get_name_from_index(index):
+    """
+    Get the animal name corresponding to a given vector index.
+    Parameters:
+    index (int): Index of the vector.
+    Returns:
+    str: Name of the animal.
+    """
+    return names[index]
+def name_normalisation(name):
+    """
+    Normalize animal names.
+    This function normalizes the names of animals by categorizing them into common types.
+    Parameters:
+    name (str): Name of the animal.
+    Returns:
+    str: Normalized animal name.
+    """
+    if 'dog' in name:
+        return "Chien"
+    elif 'cat' in name:
+        return "Chat"
+    elif 'bird' in name:
+        return "Oiseau"
+    else:
+        return "Animal non reconnu"
+def exp_negative(x):
+    """
+    Define the negative exponential function.
+    This function applies the negative exponential transformation to a given value.
+    Parameters:
+    x (float): Input value.
+    Returns:
+    float: Transformed value.
+    """
+    return math.exp(-x)
+def normalization(embeddings):
+    """
+    Normalize vectors.
+    This function normalizes either a single vector (1D) or a matrix of vectors (2D).
+    If the input is 1D, it normalizes the single vector; if 2D, it normalizes each row.
+    Parameters:
+    embeddings (np.ndarray): Input vector or matrix of vectors.
+    Returns:
+    np.ndarray: Normalized vector or matrix of vectors.
+    """
+    # Check if embeddings is a single vector (1D) or a matrix (2D)
+    if embeddings.ndim == 1:
+        # Normalize a single vector
+        norm = np.linalg.norm(embeddings)
+        if norm == 0:
+            return embeddings
+        return embeddings / norm
+    else:
+        # Normalize each row of a matrix
+        norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings / norms
+def get_audio_embedding(audio_path):
+    """
+    Get the audio embedding for a given audio file.
+    This function loads the audio file, processes it to obtain the emission,
+    flattens and averages the features, normalizes them, and returns the normalized 2D array.
+    Parameters:
+    audio_path (str): Path to the audio file.
+    Returns:
+    np.ndarray: Normalized 2D array of audio embedding.
+    """
+    waveform1, sample_rate1 = torchaudio.load(audio_path)
+    waveform1 = torchaudio.functional.resample(waveform1, sample_rate1, bundle.sample_rate)
+    with torch.inference_mode():
+        emission1, _ = model(waveform1)
+    # Flatten the first two dimensions and keep the third
+    flattened_features1 = emission1.view(-1, emission1.size(2))
+    mean_features1 = flattened_features1.mean(dim=0)
+    mean1_array = mean_features1.cpu().numpy().astype(np.float32)
+    mean1_normal = normalization(mean1_array)
+    mean1_normal_2d = mean1_normal[np.newaxis, :]
+    return mean1_normal_2d
+def searchinIndex(index, normal_embedding):
+    """
+    Search for the closest audio vectors in the animals.index file.
+    This function searches the FAISS index for the most similar vectors to the given input embedding.
+    Parameters:
+    index (faiss.Index): The FAISS index to search.
+    normal_embedding (np.ndarray): The normalized embedding to search for.
+    Returns:
+    pd.DataFrame: DataFrame containing distances and indices of the closest vectors.
+    """
+    D, I = index.search(normal_embedding, index.ntotal)
+    r = pd.DataFrame({'distance': D[0], 'index': I[0]})
+    return r
+def animal_classification(audio_path):
+    """
+    Classify the species of animals from an audio file.
+    This function extracts the audio embedding, searches the index, calculates similarity percentages,
+    normalizes the names, and applies Bayes' theorem to determine the most likely animal.
+    Parameters:
+    audio_path (str): Path to the audio file.
+    Returns:
+    str: Formatted result with animal classifications and their probabilities.
+    """
+    query_audio = get_audio_embedding(audio_path)  # Get the audio embedding
+    results = searchinIndex(index, query_audio)  # Search the index
+    results['percentage'] = results['distance'].apply(exp_negative) * 100  # Calculate the percentage
+    results['names'] = results['index'].apply(get_name_from_index)  # Get names from the index
+    results['names_normalized'] = results['names'].apply(name_normalisation)  # Normalize the names
+    resultat = bayes_theorem(results, 25)
+    formatted_result = '\n'.join([f"{animal}: {percentage:.2%}" for animal, percentage in resultat.items()])
+    return formatted_result
+def add_in_index(audio_path):
+    """
+    Add a new audio to the index for better classification.
+    This function extracts the audio embedding from a new audio file, adds it to the FAISS index,
+    updates the index file, and appends the name to the names list.
+    Parameters:
+    audio_path (str): Path to the audio file to be added.
+    Returns:
+    str: Confirmation message indicating the addition was successful.
+    """
+    new_audio = get_audio_embedding(audio_path)
+    index.add(new_audio)
+    faiss.write_index(index, index_path)
+    file_name = os.path.basename(audio_path)
+    names.append(file_name)
+    result = "L'ajout a bien effectué"
+    with open(chemin_noms_animaux, 'w') as fichier:
+        # Write each name to the file, formatted as a Python list element
+        for nom in names:
+            fichier.write(f"'{nom}',\n")
+    return result
+# Create the graphical interface
+interface = gr.Interface(fn=animal_classification, inputs="file", outputs="text")
+# Launch the interface
+interface.launch()

noms_animaux.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torchaudio
+numpy
+soundfile
+statistics
+pandas
+pyarrow
+torchaudio
+matplotlib
+torch
+transformers
+faiss-cpu