File size: 987 Bytes
fa0a93c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import utils
import spacy
from maddog import Extractor
import constant
# load
nlp = spacy.load("en_core_web_sm")
ruleExtractor = Extractor()
kb = utils.load_acronym_kb('../input/acronym_kb.json')


def popularity(sentence):

    tokens = [t.text for t in nlp(sentence) if len(t.text.strip()) > 0]
    rulebased_pairs = ruleExtractor.extract(tokens, constant.RULES)

    results = list()
    for acronym in rulebased_pairs.keys():
        if rulebased_pairs[acronym][0] != '':
            results.append((acronym, rulebased_pairs[acronym][0]))
        else:

            pred = utils.get_candidate(kb, acronym, can_num=1)
            results.append((acronym, pred[0]))
    return results


if __name__ == '__main__':
    sentence = \
    "NCBI This new genome assembly and the annotation are tagged as a RefSeq genome by NCBI and thus provide substantially enhanced genomic resources for future research involving S. scovelli."
    results = run_eval(sentence=sentence)
    print(results)