|
import utils |
|
import spacy |
|
from maddog import Extractor |
|
import constant |
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
ruleExtractor = Extractor() |
|
kb = utils.load_acronym_kb('../input/acronym_kb.json') |
|
|
|
|
|
def popularity(sentence): |
|
|
|
tokens = [t.text for t in nlp(sentence) if len(t.text.strip()) > 0] |
|
rulebased_pairs = ruleExtractor.extract(tokens, constant.RULES) |
|
|
|
results = list() |
|
for acronym in rulebased_pairs.keys(): |
|
if rulebased_pairs[acronym][0] != '': |
|
results.append((acronym, rulebased_pairs[acronym][0])) |
|
else: |
|
|
|
pred = utils.get_candidate(kb, acronym, can_num=1) |
|
results.append((acronym, pred[0])) |
|
return results |
|
|
|
|
|
if __name__ == '__main__': |
|
sentence = \ |
|
"NCBI This new genome assembly and the annotation are tagged as a RefSeq genome by NCBI and thus provide substantially enhanced genomic resources for future research involving S. scovelli." |
|
results = run_eval(sentence=sentence) |
|
print(results) |