File size: 720 Bytes
53a9a92
f49bb16
 
 
 
 
 
 
53a9a92
3c45d75
f49bb16
 
 
 
 
 
 
 
 
 
 
b30bec4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from gensim.models import KeyedVectors
from typing import List, Dict


class PreTrainedPipeline:
    def __init__(self, path=""):
        from huggingface_hub import hf_hub_download

        self.model = KeyedVectors.load_word2vec_format(
            hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False
        )

    def __call__(self, inputs: str) -> List[Dict]:
        """
        Args:
            inputs (:obj:`str`):
                a string containing some text
        Return:
            A :obj:`str`
        """
        inputs = inputs.strip()
        return [{"generated_text": ", \n\n".join(f"{k}" for k, v in self.model.most_similar(inputs, topn=30))}]