import utils import spacy from maddog import Extractor import constant # load nlp = spacy.load("en_core_web_sm") ruleExtractor = Extractor() kb = utils.load_acronym_kb('../input/acronym_kb.json') def popularity(sentence): tokens = [t.text for t in nlp(sentence) if len(t.text.strip()) > 0] rulebased_pairs = ruleExtractor.extract(tokens, constant.RULES) results = list() for acronym in rulebased_pairs.keys(): if rulebased_pairs[acronym][0] != '': results.append((acronym, rulebased_pairs[acronym][0])) else: pred = utils.get_candidate(kb, acronym, can_num=1) results.append((acronym, pred[0])) return results if __name__ == '__main__': sentence = \ "NCBI This new genome assembly and the annotation are tagged as a RefSeq genome by NCBI and thus provide substantially enhanced genomic resources for future research involving S. scovelli." results = run_eval(sentence=sentence) print(results)