sionic-ai-v1 / model_api.py
sionic's picture
Upload a code to evaluate MTEB
b401e88
raw
history blame
No virus
1.42 kB
from typing import Dict, List, Optional, Union
import numpy as np
import requests
from mteb import DRESModel
from tqdm import tqdm
class ModelV1(DRESModel):
def __init__(self, url: str, instruction: Optional[str] = None, batch_size: int = 128, **kwargs) -> None:
self.url = url
self.instruction = instruction
self.batch_size = batch_size
def get_embeddings(self, queries: List[str]) -> np.ndarray:
return np.asarray(
requests.post(self.url, json={'inputs': queries}).json()['embedding'],
dtype=np.float32,
).reshape(len(queries), 2048)
def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
return self.encode([f'{self.instruction}{query}' for query in queries])
def encode_corpus(self, corpus: List[Union[Dict[str, str], str]], **kwargs) -> np.ndarray:
sentences: List[str] = (
[f"{doc.get('title', '')} {doc['text']}".strip() for doc in corpus]
if isinstance(corpus[0], dict)
else corpus
)
return self.encode(sentences)
def encode(self, sentences: List[str], **kwargs) -> np.ndarray:
return np.concatenate(
[
self.get_embeddings(sentences[idx:idx + self.batch_size])
for idx in tqdm(range(0, len(sentences), self.batch_size), desc='encode')
],
axis=0,
)