Spaces:
Sleeping
Sleeping
import tensorflow as tf | |
import numpy as np | |
import librosa | |
import pickle | |
import io | |
# Load the YAMNet model from the SavedModel format | |
yamnet_model = tf.saved_model.load('yamnet_saved_model') | |
# Function to extract embeddings from audio file using YAMNet | |
def extract_audio_embeddings(audio_binary): | |
# Load audio from binary data using librosa | |
audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz | |
# Convert audio to float32 tensor | |
audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32) | |
# Extract embeddings using YAMNet model | |
scores, embeddings, spectrogram = yamnet_model(audio_tensor) | |
embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists | |
return embeddings_list | |
# Example usage | |
if __name__ == "__main__": | |
image_audio_path = "pictures/users/1a.mp3" | |
# Extract embeddings from image audio file | |
image_audio_embeddings = extract_audio_embeddings(image_audio_path) | |
print("Embeddings for", image_audio_path) | |
print(image_audio_embeddings) | |
print("audio embedding model loaded succesfully") |