tdubon commited on
Commit
70cda52
·
1 Parent(s): 40cb58a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spacy
3
+ from spacy.pipeline import EntityRuler
4
+ from spacy.language import Language
5
+ from spacy.matcher import PhraseMatcher
6
+ from spacy.tokens import Span
7
+
8
+ nlp = spacy.load("en_core_web_md")
9
+
10
+ #Text 1
11
+ def process_text(text1):
12
+ d = load(text1)
13
+ return [
14
+ for ent in doc1.ents:
15
+ print(ent.text, ent.label_)
16
+ for ent in doc1.ents:
17
+ print(ent.label_, spacy.explain(ent.label_))
18
+ ]
19
+
20
+ def load(text):
21
+ user_input = str(text.strip())
22
+ doc1 = nlp(user_input)
23
+
24
+ #Text 2
25
+ def entities(text2):
26
+ a = named_ents(text2)
27
+ return [print("patterns:", patterns)]
28
+
29
+ def named_ents(text):
30
+ pattern_list = []
31
+ for i in text.strip().split():
32
+ pattern_list.append(i)
33
+
34
+ patterns = list(nlp.pipe(pattern_list))
35
+
36
+ #Text 3
37
+ def run(text3):
38
+ b = pipe(text3)
39
+ return [
40
+ doc
41
+ print(nlp.pipe_names)]
42
+
43
+ def pipe(text):
44
+ matcher = PhraseMatcher(nlp.vocab)
45
+ #Create label for pattern
46
+ user_named = str(text.strip()) #gradio text box here to enter pattern label
47
+ matcher.add(user_named, patterns)
48
+ # Define the custom component
49
+ @Language.component("covid_component")
50
+ def covid_component_function(doc):
51
+ # Apply the matcher to the doc
52
+ matches = matcher(doc)
53
+ # Create a Span for each match and assign the label "ANIMAL"
54
+ spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
55
+ # Overwrite the doc.ents with the matched spans
56
+ doc.ents = spans
57
+ return doc
58
+ # Add the component to the pipeline after the "ner" component
59
+ nlp.add_pipe((user_named + "component"), after="ner")
60
+ print(nlp.pipe_names)
61
+
62
+ #Text 4
63
+
64
+ def test(text4):
65
+ c = new_sample(text4)
66
+ return [
67
+ print([(ent.text, ent.label_) for ent in apply_doc.ents])
68
+ Counter(labels)]
69
+
70
+
71
+ def new_sample(text):
72
+ user_doc = str(text).strip())
73
+ apply_doc = nlp(user_doc)
74
+ print([(ent.text, ent.label_) for ent in apply_doc.ents])
75
+ #Count total mentions of label COVID in the 3rd document
76
+ from collections import Counter
77
+ labels = [ent.label_ for ent in apply_doc.ents]
78
+ Counter(labels)
79
+
80
+
81
+ #user_input = input(str("")) #gradio text box here to enter sample text
82
+ #doc1 = nlp(user_input)
83
+
84
+ #print list of entities captured by pertained model
85
+ #for ent in doc1.ents:
86
+ #print(ent.text, ent.label_)
87
+
88
+ #inspect labels and their meaning
89
+ #for ent in doc1.ents:
90
+ #print(ent.label_, spacy.explain(ent.label_))
91
+
92
+ #Use PhraseMatcher to find all references of interest
93
+ #Define the different references to Covid
94
+ #user_entries = input(str("")) #gradio text box here to enter sample terms
95
+ #pattern_list = []
96
+
97
+ #for i in user_entries.strip().split():
98
+ # pattern_list.append(i)
99
+
100
+ #patterns = list(nlp.pipe(pattern_list))
101
+ #print("patterns:", patterns)
102
+
103
+ #Instantiate PhraseMatcher
104
+ #matcher = PhraseMatcher(nlp.vocab)
105
+
106
+ #Create label for pattern
107
+ #user_named = input(str("").strip()) #gradio text box here to enter pattern label
108
+ #matcher.add(user_named, patterns)
109
+
110
+ # Define the custom component
111
+ #@Language.component("covid_component")
112
+ #def covid_component_function(doc):
113
+ # Apply the matcher to the doc
114
+ # matches = matcher(doc)
115
+ # Create a Span for each match and assign the label "ANIMAL"
116
+ # spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
117
+ # Overwrite the doc.ents with the matched spans
118
+ # doc.ents = spans
119
+ # return doc
120
+
121
+ # Add the component to the pipeline after the "ner" component
122
+ #nlp.add_pipe((user_named + "component"), after="ner")
123
+ #print(nlp.pipe_names)
124
+
125
+
126
+ #Verify that your model now detects all specified mentions of Covid on another text
127
+ #user_doc = input(str("").strip())
128
+ #apply_doc = nlp(user_doc)
129
+ #print([(ent.text, ent.label_) for ent in apply_doc.ents])
130
+
131
+ #Count total mentions of label COVID in the 3rd document
132
+ #from collections import Counter
133
+ #labels = [ent.label_ for ent in apply_doc.ents]
134
+ #Counter(labels)
135
+
136
+ iface = gr.Interface(
137
+ process_text,
138
+ [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
139
+
140
+ entities,
141
+ [gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
142
+
143
+ run,
144
+ [gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
145
+ gr.outputs.HighlightedText(),
146
+ )
147
+
148
+ test,
149
+ [gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
150
+ gr.outputs.HighlightedText(),
151
+ )
152
+ iface.launch()