yuji96 commited on
Commit
f1941ef
1 Parent(s): 4772173

add generic pipeline

Browse files
Files changed (3) hide show
  1. README.md +1 -0
  2. pipeline.py +34 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -12,6 +12,7 @@ language:
12
  metrics:
13
  - spearmanr
14
  pipeline_tag: sentence-similarity
 
15
  ---
16
 
17
  # sup-simcse-ja-base
 
12
  metrics:
13
  - spearmanr
14
  pipeline_tag: sentence-similarity
15
+ library_name: generic
16
  ---
17
 
18
  # sup-simcse-ja-base
pipeline.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # copy from https://github.com/huggingface/api-inference-community/blob/main/docker_images/sentence_transformers/app/pipelines/sentence_similarity.py
2
+
3
+ import os
4
+ from typing import Dict, List, Union
5
+
6
+ from sentence_transformers import SentenceTransformer, util
7
+
8
+
9
+ class PreTrainedPipeline:
10
+ def __init__(
11
+ self,
12
+ model_id: str,
13
+ ):
14
+ self.model = SentenceTransformer(
15
+ model_id, use_auth_token=os.getenv("HF_API_TOKEN")
16
+ )
17
+
18
+ def __call__(self, inputs: Dict[str, Union[str, List[str]]]) -> List[float]:
19
+ """
20
+ Args:
21
+ inputs (:obj:`dict`):
22
+ a dictionary containing two keys, 'source_sentence' mapping
23
+ to the sentence that will be compared against all the others,
24
+ and 'sentences', mapping to a list of strings to which the
25
+ source will be compared.
26
+ Return:
27
+ A :obj:`list` of floats: Cosine similarity between `source_sentence` and each sentence from `sentences`.
28
+ """
29
+ embeddings1 = self.model.encode(
30
+ inputs["source_sentence"], convert_to_tensor=True
31
+ )
32
+ embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True)
33
+ similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
34
+ return similarities
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sentence-transformers
2
+ fugashi[unidic-lite]