sionic-ai-v2 / model_api.py
sionic's picture
Upload model class and mteb evaluation codes
f54cf3c
from typing import Dict, List, Optional, Union
import numpy as np
import requests
from mteb import DRESModel
from tqdm import tqdm
class SionicEmbeddingModel(DRESModel):
def __init__(self, url: str, instruction: Optional[str] = None, batch_size: int = 128, dimension: int = 2048, **kwargs) -> None:
self.url = url
self.instruction = instruction
self.batch_size = batch_size
self.dimension = dimension
def get_embeddings(self, queries: List[str]) -> np.ndarray:
return np.asarray(
requests.post(self.url, json={'inputs': queries}).json()['embedding'],
dtype=np.float32,
).reshape(len(queries), self.dimension)
def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
return self.encode([f'{self.instruction}{query}' for query in queries])
def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kwargs) -> np.ndarray:
sentences: List[str] = (
[f"{doc.get('title', '')} {doc['text']}".strip() for doc in corpus]
if isinstance(corpus[0], dict)
else corpus
)
return self.encode(sentences)
def encode(self, sentences: List[str], **kwargs) -> np.ndarray:
return np.concatenate(
[
self.get_embeddings(sentences[idx:idx + self.batch_size])
for idx in tqdm(range(0, len(sentences), self.batch_size), desc='encode')
],
axis=0,
)