Update README.md
Browse files
README.md
CHANGED
@@ -8,3 +8,21 @@ language:
|
|
8 |
pipeline_tag: sentence-similarity
|
9 |
---
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
pipeline_tag: sentence-similarity
|
9 |
---
|
10 |
|
11 |
+
```python
|
12 |
+
import numpy as np
|
13 |
+
from transformers import pipeline
|
14 |
+
from scipy.spatial.distance import cdist
|
15 |
+
|
16 |
+
retriever = pipeline('feature-extraction', 'cmarkea/bloomz-560m-retriever')
|
17 |
+
infer = lambda x: [ii[0][-1] for ii in retriever(x)]
|
18 |
+
|
19 |
+
list_of_contexts = [...]
|
20 |
+
emb_contexts = np.concatenate(infer(list_of_contexts), axis=0)
|
21 |
+
list_of_queries = [...]
|
22 |
+
emb_queries = np.concatenate(infer(list_of_queries), axis=0)
|
23 |
+
|
24 |
+
dist = cdist(emb_queries, emb_contexts, 'euclidean')
|
25 |
+
# top 5 nearest contexts for each queries
|
26 |
+
top_k = lambda x: [[list_of_contexts[qq] for qq in ii] for ii in dist.argsort(axis=-1)[:,:x]]
|
27 |
+
top_contexts = top_k(5)
|
28 |
+
```
|