File size: 743 Bytes
3d2ca49
 
1d67a6e
 
 
 
3d2ca49
 
1d67a6e
77ce2b6
214ad84
3d2ca49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77ce2b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
from sklearn.pipeline import Pipeline
from storage import Fetch
from cleaning import TextCleaner
from embedding import Embedder
from search import Search


def get_recs(id_list, save_recs=False):
    path_to_library = "./data/libraries/APSP_50_allenai-specter"
    path_to_save_recs = "./output/"

    ## Create pipeline

    model = Pipeline(
        [
            ("fetch", Fetch()),
            ("clean", TextCleaner()),
            ("embed", Embedder(model_name="allenai-specter")),
            ("search", Search(path_to_library=path_to_library)),
        ]
    )

    recommendation_df = model.transform(id_list)

    if save_recs:
        recommendation_df.to_feather(path_to_save_recs)

    return recommendation_df