Spaces:
Build error
Build error
| import spacy | |
| nlp = spacy.load("en_core_web_md") | |
| nlp.add_pipe("entityfishing") | |
| def extract_entities(article): | |
| '''Find wikidata refs for article entities''' | |
| ents = [] | |
| seen_entities = [] | |
| seen_surnames = [] | |
| seen_qids = [] | |
| doc = nlp(article) | |
| for ent in doc.ents: | |
| if ent._.kb_qid is None or ent.label_ not in ["ORG", "PERSON", "GPE"] or ent.text in seen_entities: | |
| continue | |
| if ent._.nerd_score < 0.5: | |
| continue | |
| if len(ent.text.split()) == 1: | |
| # Single name | |
| if ent.text in seen_surnames: | |
| continue | |
| elif ent.label_ == "PERSON": | |
| # Multipart name | |
| seen_surnames.append(ent.text.split()[-1]) | |
| seen_entities.append(ent.text) | |
| if ent._.kb_qid in seen_qids: | |
| continue | |
| seen_qids.append(ent._.kb_qid) | |
| ents.append(ent) | |
| return ents | |
| if __name__ == "__main__": | |
| ents = extract_entities(input("article: ")) | |
| print() | |
| print("ENTITIES:") | |
| for ent in ents: | |
| print(ent.text, "\t", ent.label_, "\t", ent._.url_wikidata) | |