abdullahmubeen10 commited on
Commit
b388843
Β·
verified Β·
1 Parent(s): 38becfe

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +204 -228
Demo.py CHANGED
@@ -1,228 +1,204 @@
1
- import streamlit as st
2
- import sparknlp
3
- import os
4
- import pandas as pd
5
- from sparknlp.base import *
6
- from sparknlp.annotator import *
7
- from pyspark.ml import Pipeline
8
- from sparknlp.pretrained import PretrainedPipeline
9
- from annotated_text import annotated_text
10
-
11
- # Page configuration
12
- st.set_page_config(layout="wide", initial_sidebar_state="auto")
13
-
14
- # CSS for styling
15
- st.markdown("""
16
- <style>
17
- .main-title {
18
- font-size: 36px;
19
- color: #4A90E2;
20
- font-weight: bold;
21
- text-align: center;
22
- }
23
- .section {
24
- background-color: #f9f9f9;
25
- padding: 10px;
26
- border-radius: 10px;
27
- margin-top: 10px;
28
- }
29
- .section p, .section ul {
30
- color: #666666;
31
- }
32
- </style>
33
- """, unsafe_allow_html=True)
34
-
35
- # Initialize Spark NLP
36
- @st.cache_resource
37
- def init_spark():
38
- return sparknlp.start()
39
-
40
- # Create the NER pipeline
41
- @st.cache_resource
42
- def create_pipeline(model, context_dict):
43
- documentAssembler = DocumentAssembler() \
44
- .setInputCol("text") \
45
- .setOutputCol("document")
46
-
47
- sentenceDetector = SentenceDetector() \
48
- .setInputCols(["document"]) \
49
- .setOutputCol("sentence")
50
-
51
- tokenizer = Tokenizer() \
52
- .setInputCols(["sentence"]) \
53
- .setOutputCol("token")
54
-
55
- zero_shot_ner = ZeroShotNerModel.pretrained(model, "en")\
56
- .setInputCols(["sentence", "token"])\
57
- .setOutputCol("zero_shot_ner")\
58
- .setEntityDefinitions(context_dict)
59
-
60
- ner_converter = NerConverter()\
61
- .setInputCols(["sentence", "token", "zero_shot_ner"])\
62
- .setOutputCol("ner_chunk")\
63
-
64
- pipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, zero_shot_ner, ner_converter])
65
- return pipeline
66
-
67
- # Fit data using the pipeline
68
- def fit_data(pipeline, data):
69
- empty_df = spark.createDataFrame([['']]).toDF('text')
70
- pipeline_model = pipeline.fit(empty_df)
71
- model = LightPipeline(pipeline_model)
72
- result = model.fullAnnotate(data)
73
- return result
74
-
75
- # Annotate the text with NER results
76
- def annotate(data):
77
- document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
78
- annotated_words = []
79
- for chunk, label in zip(chunks, labels):
80
- parts = document.split(chunk, 1)
81
- if parts[0]:
82
- annotated_words.append(parts[0])
83
- annotated_words.append((chunk, label))
84
- document = parts[1]
85
- if document:
86
- annotated_words.append(document)
87
- annotated_text(*annotated_words)
88
-
89
- def df_to_dict(df):
90
- context_dict = {}
91
- for col in df.columns:
92
- values = df[col].dropna().tolist()
93
- if values:
94
- context_dict[col] = values
95
- return context_dict
96
-
97
- # Sidebar content
98
- model = st.sidebar.selectbox(
99
- "Choose the pretrained model",
100
- ["zero_shot_ner_roberta"],
101
- help="For more info about the models visit: https://sparknlp.org/models"
102
- )
103
-
104
- # Set up the page layout
105
- st.markdown('<div class="main-title">Zero-Shot Named Entity Recognition (NER)</div>', unsafe_allow_html=True)
106
- st.markdown('<div class="section"><p>Explore Zero-Shot Named Entity Recognition (NER)β€”a state-of-the-art technique that detects and classifies named entities in text without needing specific training on annotated datasets. With our interactive interface, you can modify the context by editing the DataFrame to define custom entity types and examples. Then, input your own text or select from predefined examples to see how the model identifies and categorizes entities in real time.</p></div>', unsafe_allow_html=True)
107
-
108
- # Reference notebook link in sidebar
109
- link = """
110
- <a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/1.6.ZeroShot_Clinical_NER.ipynb">
111
- <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
112
- </a>
113
- """
114
- st.sidebar.markdown('Reference notebook:')
115
- st.sidebar.markdown(link, unsafe_allow_html=True)
116
-
117
- # Set examples and create DataFrame
118
- data = {
119
- "PROBLEM": [
120
- "What is the disease?",
121
- "What are the symptoms of the condition?",
122
- "What is the patient's diagnosis?",
123
- "What kind of disease is he suffering from?",
124
- "What specific medical issue does she have?",
125
- "What is the main problem the patient is facing?",
126
- "What were the reasons for the patient's hospitalization?"
127
- ],
128
- "DRUG": [
129
- "Which medication was prescribed?",
130
- "What is the name of the drug used for treatment?",
131
- "Which drug is administered for this condition?",
132
- "What medication does he take daily?",
133
- "What drugs are used to manage his symptoms?",
134
- "Which medicine is recommended for this illness?",
135
- "What is the prescription for this medical condition?"
136
- ],
137
- "ADMISSION_DATE": [
138
- "When was the patient admitted to the hospital?",
139
- "What is the date of the patient's admission?",
140
- "On which date did the patient enter the clinic?",
141
- "When did the patient check into the hospital?",
142
- "What is the admission date for the patient?"
143
- ],
144
- "PATIENT_AGE": [
145
- "How old is the patient?",
146
- "What is the patient's age?",
147
- "At what age was the patient diagnosed?",
148
- "Can you tell me the age of the patient?",
149
- "What is the age of the person receiving treatment?"
150
- ],
151
- "SYMPTOM": [
152
- "What symptoms is the patient experiencing?",
153
- "What are the signs of the disease?",
154
- "Which symptoms did the patient report?",
155
- "What were the initial symptoms observed?",
156
- "What specific symptoms are present?"
157
- ],
158
- "TREATMENT": [
159
- "What treatment plan was recommended?",
160
- "Which therapies are being used?",
161
- "What is the current treatment protocol?",
162
- "What type of treatment is the patient undergoing?",
163
- "What are the options for treating this condition?"
164
- ],
165
- "DOCTOR": [
166
- "Who is the treating physician?",
167
- "Which doctor is handling the case?",
168
- "What is the name of the attending doctor?",
169
- "Who is the specialist for this illness?",
170
- "Can you provide the name of the doctor overseeing the treatment?"
171
- ]
172
- }
173
-
174
- # Pad shorter lists with empty strings
175
- max_length = max(len(v) for v in data.values())
176
- for key in data.keys():
177
- while len(data[key]) < max_length:
178
- data[key].append(None)
179
-
180
- # Create DataFrame and display
181
- df = pd.DataFrame(data)
182
- df.index += 1
183
- st.write("Context DataFrame (Click To Edit)")
184
- edited_df = st.data_editor(df)
185
-
186
- # Example sentences
187
- examples = [
188
- "Dr. Taylor prescribed Lisinopril to a 68-year-old patient with high blood pressure. The patient was admitted to the hospital on April 15, 2024, after experiencing severe hypertension symptoms.",
189
- "The 50-year-old male patient reported persistent back pain. The treatment plan includes physical therapy and a medication called Flexeril to alleviate the discomfort.",
190
- "The patient was admitted on June 12, 2024, with symptoms of severe abdominal pain. Dr. Kim diagnosed acute appendicitis and recommended immediate surgical intervention.",
191
- "A 25-year-old female patient with a history of asthma was treated with Albuterol. Dr. Patel noted that the patient's symptoms of shortness of breath improved significantly after starting the medication.",
192
- "The 72-year-old patient underwent surgery for cataracts on May 30, 2024. Dr. Martinez prescribed eye drops and scheduled a follow-up appointment for post-operative care.",
193
- "The patient, aged 40, presented with symptoms of chronic fatigue and joint pain. Dr. Nguyen recommended a combination of lifestyle changes and a new drug called Imunorix.",
194
- "Dr. Wilson observed that the 34-year-old patient’s condition, diagnosed as lupus, was managed with hydroxychloroquine and a tailored treatment regimen.",
195
- "The 56-year-old patient experienced symptoms of severe dehydration due to gastroenteritis. Dr. Rogers provided intravenous fluids and anti-nausea medication as treatment.",
196
- "On July 8, 2024, the patient was admitted with acute respiratory distress. Dr. Green prescribed a regimen including corticosteroids and bronchodilators to manage the symptoms.",
197
- "The 29-year-old patient, diagnosed with chronic migraines, was treated with a new medication called MigraRelief. The attending physician, Dr. Lewis, also recommended cognitive behavioral therapy."
198
- ]
199
-
200
- selected_text = st.selectbox("Select an example", examples)
201
- custom_input = st.text_input("Try it with your own Sentence!")
202
-
203
- text_to_analyze = custom_input if custom_input else selected_text
204
- context_dict = df_to_dict(edited_df)
205
-
206
- # Display example text
207
- st.subheader('Full Example Text')
208
- HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
209
- st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
210
-
211
- # Initialize Spark and create pipeline
212
- spark = init_spark()
213
- pipeline = create_pipeline(model, context_dict)
214
- output = fit_data(pipeline, text_to_analyze)
215
-
216
- # Display processed output
217
- st.subheader("Processed Output:")
218
- results = {
219
- 'Document': output[0]['document'][0].result,
220
- 'NER Chunk': [n.result for n in output[0]['ner_chunk']],
221
- "NER Label": [n.metadata['entity'] for n in output[0]['ner_chunk']]
222
- }
223
- annotate(results)
224
-
225
- with st.expander("View DataFrame"):
226
- df = pd.DataFrame({'NER Chunk': results['NER Chunk'], 'NER Label': results['NER Label']})
227
- df.index += 1
228
- st.dataframe(df)
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+ from sparknlp.base import *
6
+ from sparknlp.annotator import *
7
+ from pyspark.ml import Pipeline
8
+ from sparknlp.pretrained import PretrainedPipeline
9
+ from annotated_text import annotated_text
10
+
11
+ # Page configuration
12
+ st.set_page_config(layout="wide", initial_sidebar_state="auto")
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ </style>
33
+ """, unsafe_allow_html=True)
34
+
35
+ # Initialize Spark NLP
36
+ @st.cache_resource
37
+ def init_spark():
38
+ return sparknlp.start()
39
+
40
+ # Create the NER pipeline
41
+ @st.cache_resource
42
+ def create_pipeline(model, context_dict):
43
+ documentAssembler = DocumentAssembler() \
44
+ .setInputCol("text") \
45
+ .setOutputCol("document")
46
+
47
+ sentenceDetector = SentenceDetector() \
48
+ .setInputCols(["document"]) \
49
+ .setOutputCol("sentence")
50
+
51
+ tokenizer = Tokenizer() \
52
+ .setInputCols(["sentence"]) \
53
+ .setOutputCol("token")
54
+
55
+ zero_shot_ner = ZeroShotNerModel.pretrained(model, "en")\
56
+ .setInputCols(["sentence", "token"])\
57
+ .setOutputCol("zero_shot_ner")\
58
+ .setEntityDefinitions(context_dict)
59
+
60
+ ner_converter = NerConverter()\
61
+ .setInputCols(["sentence", "token", "zero_shot_ner"])\
62
+ .setOutputCol("ner_chunk")\
63
+
64
+ pipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, zero_shot_ner, ner_converter])
65
+ return pipeline
66
+
67
+ # Fit data using the pipeline
68
+ def fit_data(pipeline, data):
69
+ empty_df = spark.createDataFrame([['']]).toDF('text')
70
+ pipeline_model = pipeline.fit(empty_df)
71
+ model = LightPipeline(pipeline_model)
72
+ result = model.fullAnnotate(data)
73
+ return result
74
+
75
+ # Annotate the text with NER results
76
+ def annotate(data):
77
+ document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
78
+ annotated_words = []
79
+ for chunk, label in zip(chunks, labels):
80
+ parts = document.split(chunk, 1)
81
+ if parts[0]:
82
+ annotated_words.append(parts[0])
83
+ annotated_words.append((chunk, label))
84
+ document = parts[1]
85
+ if document:
86
+ annotated_words.append(document)
87
+ annotated_text(*annotated_words)
88
+
89
+ def df_to_dict(df):
90
+ context_dict = {}
91
+ for col in df.columns:
92
+ values = df[col].dropna().tolist()
93
+ if values:
94
+ context_dict[col] = values
95
+ return context_dict
96
+
97
+ # Sidebar content
98
+ model = st.sidebar.selectbox(
99
+ "Choose the pretrained model",
100
+ ["zero_shot_ner_roberta"],
101
+ help="For more info about the models visit: https://sparknlp.org/models"
102
+ )
103
+
104
+ # Set up the page layout
105
+ st.markdown('<div class="main-title">Zero-Shot Named Entity Recognition (NER)</div>', unsafe_allow_html=True)
106
+ st.markdown('<div class="section"><p>Explore Zero-Shot Named Entity Recognition (NER)β€”a state-of-the-art technique that detects and classifies named entities in text without needing specific training on annotated datasets. With our interactive interface, you can modify the context by editing the DataFrame to define custom entity types and examples. Then, input your own text or select from predefined examples to see how the model identifies and categorizes entities in real time.</p></div>', unsafe_allow_html=True)
107
+
108
+ # Reference notebook link in sidebar
109
+ link = """
110
+ <a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Healthcare/1.6.ZeroShot_Clinical_NER.ipynb">
111
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
112
+ </a>
113
+ """
114
+ st.sidebar.markdown('Reference notebook:')
115
+ st.sidebar.markdown(link, unsafe_allow_html=True)
116
+
117
+ # Set examples and create DataFrame
118
+ data = {
119
+ "PROBLEM": [
120
+ "What is the disease?",
121
+ "What are the symptoms of the condition?",
122
+ "What is the patient's diagnosis?",
123
+ "What kind of disease is he suffering from?",
124
+ "What specific medical issue does she have?",
125
+ "What is the main problem the patient is facing?",
126
+ "What were the reasons for the patient's hospitalization?"
127
+ ],
128
+ "DRUG": [
129
+ "Which medication was prescribed?",
130
+ "What is the name of the drug used for treatment?",
131
+ "Which drug is administered for this condition?",
132
+ "What medication does he take daily?",
133
+ "What drugs are used to manage his symptoms?",
134
+ "Which medicine is recommended for this illness?",
135
+ "What is the prescription for this medical condition?"
136
+ ],
137
+ "SYMPTOM": [
138
+ "What symptoms is the patient experiencing?",
139
+ "What are the signs of the disease?",
140
+ "Which symptoms did the patient report?",
141
+ "What were the initial symptoms observed?",
142
+ "What specific symptoms are present?"
143
+ ]
144
+ }
145
+
146
+ # Pad shorter lists with empty strings
147
+ max_length = max(len(v) for v in data.values())
148
+ for key in data.keys():
149
+ while len(data[key]) < max_length:
150
+ data[key].append(None)
151
+
152
+ # Create DataFrame
153
+ df = pd.DataFrame(data)
154
+ df.index += 1
155
+
156
+ # Add new key with all values as None
157
+ new_key = st.text_input("Add Lable:")
158
+ if new_key:
159
+ data = {new_key.upper(): [None] * max_length, **data}
160
+ df = pd.DataFrame(data)
161
+ df.index += 1
162
+ st.success(f"Added '{new_key}' to Context DataFrame")
163
+
164
+ st.write("Context DataFrame (Click To Edit)")
165
+ edited_df = st.data_editor(df)
166
+
167
+ # Example sentences
168
+ examples = [
169
+ "The patient is experiencing severe headache and nausea. The doctor has prescribed Ibuprofen to alleviate the symptoms. The patient has been complaining about these symptoms for the last three days.",
170
+ "The patient's main problem is chronic back pain. This issue has been affecting their daily activities significantly. The doctor recommended a series of physical therapy sessions to address the problem.",
171
+ "After the diagnosis of diabetes, the patient was given Metformin as a part of their treatment plan. The medication is intended to help manage blood sugar levels effectively.",
172
+ "The symptoms reported by the patient include persistent cough and shortness of breath. The doctor has advised some tests to identify the underlying cause of these symptoms.",
173
+ "The patient has been prescribed Prednisone to manage their severe inflammation. This medication is part of the treatment plan for their chronic condition."
174
+ ]
175
+
176
+ selected_text = st.selectbox("Select an example", examples)
177
+ custom_input = st.text_input("Try it with your own Sentence!")
178
+
179
+ text_to_analyze = custom_input if custom_input else selected_text
180
+ context_dict = df_to_dict(edited_df)
181
+
182
+ # Display example text
183
+ st.subheader('Full Example Text')
184
+ HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
185
+ st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
186
+
187
+ # Initialize Spark and create pipeline
188
+ spark = init_spark()
189
+ pipeline = create_pipeline(model, context_dict)
190
+ output = fit_data(pipeline, text_to_analyze)
191
+
192
+ # Display processed output
193
+ st.subheader("Processed Output:")
194
+ results = {
195
+ 'Document': output[0]['document'][0].result,
196
+ 'NER Chunk': [n.result for n in output[0]['ner_chunk']],
197
+ "NER Label": [n.metadata['entity'] for n in output[0]['ner_chunk']]
198
+ }
199
+ annotate(results)
200
+
201
+ with st.expander("View DataFrame"):
202
+ df = pd.DataFrame({'NER Chunk': results['NER Chunk'], 'NER Label': results['NER Label']})
203
+ df.index += 1
204
+ st.dataframe(df)