# import io | |
from typing import Dict, List, Any | |
import requests | |
# import librosa | |
from transformers import ClapModel, ClapProcessor | |
# import gc | |
# import base64 | |
class EndpointHandler: | |
def __init__(self, path=""): | |
model_name = "laion/larger_clap_general" | |
self.model = ClapModel.from_pretrained(model_name) | |
self.processor = ClapProcessor.from_pretrained(model_name) | |
def __call__(self, data: Dict[str, Any]): | |
""" | |
data args: | |
inputs (:obj: `str`) | |
Return: | |
A :obj:`list` | `dict`: will be serialized and returned | |
""" | |
# print(type(data)) | |
if 'inputs' in data: | |
query = data['inputs'] | |
text_inputs = self.processor(text=query, return_tensors="pt") | |
text_embed = self.model.get_text_features(**text_inputs)[0] | |
return text_embed.detach().numpy() | |
# return requests.get('https://api.ipify.org?format=json').text | |
# if 'audio' in data: | |
# # Load the audio data into librosa | |
# audio_buffer = io.BytesIO(base64.b64decode(data['audio'])) | |
# y, sr = librosa.load(audio_buffer, sr=48000) | |
# inputs = self.processor(audios=y, sampling_rate=sr, return_tensors="pt") | |
# embedding = self.model.get_audio_features(**inputs)[0] | |
# gc.collect() | |