clap1 / handler.py
arthur-stackadoc-com's picture
went back to text_embedding
cb288da
raw
history blame
1.36 kB
# import io
from typing import Dict, List, Any
import requests
# import librosa
from transformers import ClapModel, ClapProcessor
# import gc
# import base64
class EndpointHandler:
def __init__(self, path=""):
model_name = "laion/larger_clap_general"
self.model = ClapModel.from_pretrained(model_name)
self.processor = ClapProcessor.from_pretrained(model_name)
def __call__(self, data: Dict[str, Any]):
"""
data args:
inputs (:obj: `str`)
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
# print(type(data))
if 'inputs' in data:
query = data['inputs']
text_inputs = self.processor(text=query, return_tensors="pt")
text_embed = self.model.get_text_features(**text_inputs)[0]
return text_embed.detach().numpy()
# return requests.get('https://api.ipify.org?format=json').text
# if 'audio' in data:
# # Load the audio data into librosa
# audio_buffer = io.BytesIO(base64.b64decode(data['audio']))
# y, sr = librosa.load(audio_buffer, sr=48000)
# inputs = self.processor(audios=y, sampling_rate=sr, return_tensors="pt")
# embedding = self.model.get_audio_features(**inputs)[0]
# gc.collect()