File size: 2,555 Bytes
6e9b9b9
70cda52
 
 
 
 
 
 
fee7a69
70cda52
3e8d135
 
 
 
 
 
 
 
 
70cda52
3e8d135
70cda52
3e8d135
 
 
70cda52
3e8d135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70cda52
3e8d135
 
 
 
 
 
 
2c80115
3e8d135
 
70cda52
3e8d135
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")

def load(txt1, txt2, txt3, txt4):
    user_input = str(txt1.strip())
    doc1 = nlp(user_input)
    entities = [(ent.text, ent.label_) for ent in doc1.ents]
        
        
    pattern_list = []
    for i in txt2.strip().split():
        pattern_list.append(i)
    
    patterns = list(nlp.pipe(pattern_list))
    
    matcher = PhraseMatcher(nlp.vocab)
    user_named = str(txt3.strip()) 
    matcher.add(user_named, patterns)
    
    @Language.component("added_component")
    def component_function(doc):
        matches = matcher(doc)
        spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
        doc.ents = spans
        return doc

    if "added_component" not in nlp.pipe_names:
        nlp.add_pipe(("added_component"), after="ner")
    
    user_input4 = str(txt4.strip())
    apply_doc = nlp(user_input4)
    entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
   
    from collections import Counter
    labels = [ent.label_ for ent in apply_doc.ents]
    lab_counts = Counter(labels)

    return(entities, entities2, lab_counts)
    
    
    
description = "Use this space to produce and test your own customized NER"


iface = gr.Interface(
    title = "Customized Named Entity Recognition",
    description = description,
    fn = load,
    inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
    outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
              gr.outputs.Textbox(type="str", label="Entites recognized after - on new text"), 
              gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
    theme = "dark"
)
iface.launch()