bloomz-3b-retriever / README.md
Cyrile's picture
Update README.md
8bc5035
metadata
license: bigscience-bloom-rail-1.0
datasets:
  - squad
language:
  - fr
  - en
pipeline_tag: sentence-similarity
import numpy as np
from transformers import pipeline
from scipy.spatial.distance import cdist

retriever = pipeline('feature-extraction', 'cmarkea/bloomz-3b-retriever')
infer = lambda x: [ii[0][-1] for ii in retriever(x)]

list_of_contexts = [...]
emb_contexts = np.concatenate(infer(list_of_contexts), axis=0)
list_of_queries = [...]
emb_queries = np.concatenate(infer(list_of_queries), axis=0)

dist = cdist(emb_queries, emb_contexts, 'euclidean')
top_k = lambda x: [[list_of_contexts[qq] for qq in ii] for ii in dist.argsort(axis=-1)[:,:x]]
# top 5 nearest contexts for each queries
top_contexts = top_k(5)