hprasath's picture
Upload 9 files
bbcc5b2 verified
import tensorflow as tf
import numpy as np
import librosa
import pickle
import io
# Load the YAMNet model from the SavedModel format
yamnet_model = tf.saved_model.load('yamnet_saved_model')
# Function to extract embeddings from audio file using YAMNet
def extract_audio_embeddings(audio_binary):
# Load audio from binary data using librosa
audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz
# Convert audio to float32 tensor
audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
# Extract embeddings using YAMNet model
scores, embeddings, spectrogram = yamnet_model(audio_tensor)
embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists
return embeddings_list
# Example usage
if __name__ == "__main__":
image_audio_path = "pictures/users/1a.mp3"
# Extract embeddings from image audio file
image_audio_embeddings = extract_audio_embeddings(image_audio_path)
print("Embeddings for", image_audio_path)
print(image_audio_embeddings)
print("audio embedding model loaded succesfully")