# import io from typing import Dict, List, Any import requests # import librosa from transformers import ClapModel, ClapProcessor # import gc # import base64 class EndpointHandler: def __init__(self, path=""): model_name = "laion/larger_clap_general" self.model = ClapModel.from_pretrained(model_name) self.processor = ClapProcessor.from_pretrained(model_name) def __call__(self, data: Dict[str, Any]): """ data args: inputs (:obj: `str`) Return: A :obj:`list` | `dict`: will be serialized and returned """ # print(type(data)) if 'inputs' in data: query = data['inputs'] text_inputs = self.processor(text=query, return_tensors="pt") text_embed = self.model.get_text_features(**text_inputs)[0] return text_embed.detach().numpy() # return requests.get('https://api.ipify.org?format=json').text # if 'audio' in data: # # Load the audio data into librosa # audio_buffer = io.BytesIO(base64.b64decode(data['audio'])) # y, sr = librosa.load(audio_buffer, sr=48000) # inputs = self.processor(audios=y, sampling_rate=sr, return_tensors="pt") # embedding = self.model.get_audio_features(**inputs)[0] # gc.collect()