AIEcosystem commited on
Commit
d1553d4
·
verified ·
1 Parent(s): 8f31f8b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +16 -59
src/streamlit_app.py CHANGED
@@ -1,8 +1,5 @@
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
3
-
4
-
5
-
6
  import time
7
  import streamlit as st
8
  import pandas as pd
@@ -16,11 +13,8 @@ from gliner import GLiNER
16
  from comet_ml import Experiment
17
  import hashlib
18
 
19
-
20
-
21
  # --- Page Configuration and UI Elements ---
22
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
23
-
24
  st.markdown(
25
  """
26
  <style>
@@ -69,20 +63,11 @@ st.markdown(
69
  }
70
  </style>
71
  """,
72
- unsafe_allow_html=True
73
- )
74
-
75
  st.subheader("HR.ai", divider="green")
76
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
77
-
78
  expander = st.expander("**Important notes**")
79
- expander.write("""**Named Entities:** This HR.ai predicts thirty-five (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
80
- Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
81
- **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
82
- **Usage Limits:** You can request results unlimited times for one (1) month.
83
- **Supported Languages:** English
84
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
85
-
86
  with st.sidebar:
87
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
88
  code = '''
@@ -119,8 +104,7 @@ category_mapping = {
119
  "Deductions": ["Tax", "Deductions"],
120
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
121
  "Legal & Compliance": ["Offer_letter", "Agreement"],
122
- "Professional_Development": ["Certification", "Skill"]
123
- }
124
 
125
  # --- Model Loading ---
126
  @st.cache_resource
@@ -131,7 +115,6 @@ def load_ner_model():
131
  except Exception as e:
132
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
133
  st.stop()
134
-
135
  model = load_ner_model()
136
 
137
  # Flatten the mapping to a single dictionary
@@ -188,7 +171,6 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
188
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
189
  else:
190
  st.info(f"No entities found for the '{category_name}' category.")
191
-
192
  with st.expander("See Glossary of tags"):
193
  st.write('''
194
  - **text**: ['entity extracted from your text data']
@@ -198,15 +180,12 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
198
  - **start**: ['index of the start of the corresponding entity']
199
  - **end**: ['index of the end of the corresponding entity']
200
  ''')
201
-
202
  st.divider()
203
  st.subheader("Candidate Card", divider="green")
204
  fig_treemap = px.treemap(st.session_state.df_ner, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
205
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
206
  st.plotly_chart(fig_treemap)
207
-
208
-
209
-
210
  dfa = pd.DataFrame(
211
  data={
212
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
@@ -220,26 +199,30 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
220
  )
221
  buf = io.BytesIO()
222
  with zipfile.ZipFile(buf, "w") as myzip:
223
- myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
224
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
225
-
226
  with stylable_container(
227
  key="download_button",
228
- css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
229
  ):
230
  st.download_button(
231
  label="Download results and glossary (zip)",
232
  data=buf.getvalue(),
233
  file_name="nlpblogs_results.zip",
234
  mime="application/zip",)
235
-
236
  if comet_initialized:
 
 
 
 
 
237
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
238
  experiment.end()
239
  else:
240
  st.warning("No entities were found in the provided text.")
241
 
242
-
243
  # --- Question Answering Section ---
244
  @st.cache_resource
245
  def load_gliner_model():
@@ -251,9 +234,7 @@ def load_gliner_model():
251
  st.stop()
252
 
253
  qa_model = load_gliner_model()
254
-
255
  st.subheader("Question-Answering", divider="green")
256
-
257
  if 'user_labels' not in st.session_state:
258
  st.session_state.user_labels = []
259
 
@@ -284,9 +265,7 @@ if st.session_state.user_labels:
284
  st.rerun()
285
  else:
286
  st.info("No questions defined yet. Use the input above to add one.")
287
-
288
  st.divider()
289
-
290
  if st.button("Extract Answers"):
291
  if not text.strip():
292
  st.warning("Please enter some text to analyze.")
@@ -317,7 +296,7 @@ if st.button("Extract Answers"):
317
  st.dataframe(df_qa, use_container_width=True)
318
  csv_data = df_qa.to_csv(index=False).encode('utf-8')
319
  with stylable_container(
320
- key="download_button",
321
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
322
  ):
323
  st.download_button(
@@ -326,12 +305,9 @@ if st.button("Extract Answers"):
326
  file_name="nlpblogs_extracted_answers.csv",
327
  mime="text/csv",
328
  )
329
-
330
  if comet_initialized:
331
  experiment.log_metric("processing_time_seconds", elapsed_time)
332
- experiment.log_table("predicted_entities", df)
333
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
334
-
335
  experiment.end()
336
  else:
337
  st.info("No answers were found in the text with the defined questions.")
@@ -342,23 +318,4 @@ if st.button("Extract Answers"):
342
  st.write(f"Error details: {e}")
343
  if comet_initialized:
344
  experiment.log_text(f"Error: {e}")
345
- experiment.end()
346
-
347
-
348
-
349
-
350
-
351
-
352
-
353
-
354
-
355
-
356
-
357
- else:
358
- st.warning("No answers were found for the provided questions.")
359
- if 'df_qa' in st.session_state:
360
- del st.session_state.df_qa
361
- except Exception as e:
362
- st.error(f"An error occurred during answer extraction: {e}")
363
-
364
-
 
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
 
 
 
3
  import time
4
  import streamlit as st
5
  import pandas as pd
 
13
  from comet_ml import Experiment
14
  import hashlib
15
 
 
 
16
  # --- Page Configuration and UI Elements ---
17
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 
18
  st.markdown(
19
  """
20
  <style>
 
63
  }
64
  </style>
65
  """,
66
+ unsafe_allow_html=True)
 
 
67
  st.subheader("HR.ai", divider="green")
68
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
69
  expander = st.expander("**Important notes**")
70
+ expander.write("""**Named Entities:** This HR.ai predicts thirty-five (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
71
  with st.sidebar:
72
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
73
  code = '''
 
104
  "Deductions": ["Tax", "Deductions"],
105
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
106
  "Legal & Compliance": ["Offer_letter", "Agreement"],
107
+ "Professional_Development": ["Certification", "Skill"]}
 
108
 
109
  # --- Model Loading ---
110
  @st.cache_resource
 
115
  except Exception as e:
116
  st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
117
  st.stop()
 
118
  model = load_ner_model()
119
 
120
  # Flatten the mapping to a single dictionary
 
171
  st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
172
  else:
173
  st.info(f"No entities found for the '{category_name}' category.")
 
174
  with st.expander("See Glossary of tags"):
175
  st.write('''
176
  - **text**: ['entity extracted from your text data']
 
180
  - **start**: ['index of the start of the corresponding entity']
181
  - **end**: ['index of the end of the corresponding entity']
182
  ''')
 
183
  st.divider()
184
  st.subheader("Candidate Card", divider="green")
185
  fig_treemap = px.treemap(st.session_state.df_ner, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
186
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
187
  st.plotly_chart(fig_treemap)
188
+ df_ner_results = st.session_state.df_ner.drop(columns=['category']) # Define df_ner_results here
 
 
189
  dfa = pd.DataFrame(
190
  data={
191
  'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
 
199
  )
200
  buf = io.BytesIO()
201
  with zipfile.ZipFile(buf, "w") as myzip:
202
+ myzip.writestr("Summary of the results.csv", df_ner_results.to_csv(index=False)) # Use df_ner_results
203
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
204
+
205
  with stylable_container(
206
  key="download_button",
207
+ css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
208
  ):
209
  st.download_button(
210
  label="Download results and glossary (zip)",
211
  data=buf.getvalue(),
212
  file_name="nlpblogs_results.zip",
213
  mime="application/zip",)
214
+
215
  if comet_initialized:
216
+ experiment = Experiment(
217
+ api_key=COMET_API_KEY,
218
+ workspace=COMET_WORKSPACE,
219
+ project_name=COMET_PROJECT_NAME,
220
+ )
221
  experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
222
  experiment.end()
223
  else:
224
  st.warning("No entities were found in the provided text.")
225
 
 
226
  # --- Question Answering Section ---
227
  @st.cache_resource
228
  def load_gliner_model():
 
234
  st.stop()
235
 
236
  qa_model = load_gliner_model()
 
237
  st.subheader("Question-Answering", divider="green")
 
238
  if 'user_labels' not in st.session_state:
239
  st.session_state.user_labels = []
240
 
 
265
  st.rerun()
266
  else:
267
  st.info("No questions defined yet. Use the input above to add one.")
 
268
  st.divider()
 
269
  if st.button("Extract Answers"):
270
  if not text.strip():
271
  st.warning("Please enter some text to analyze.")
 
296
  st.dataframe(df_qa, use_container_width=True)
297
  csv_data = df_qa.to_csv(index=False).encode('utf-8')
298
  with stylable_container(
299
+ key="download_button_qa",
300
  css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
301
  ):
302
  st.download_button(
 
305
  file_name="nlpblogs_extracted_answers.csv",
306
  mime="text/csv",
307
  )
 
308
  if comet_initialized:
309
  experiment.log_metric("processing_time_seconds", elapsed_time)
310
+ experiment.log_table("predicted_entities", df_qa)
 
 
311
  experiment.end()
312
  else:
313
  st.info("No answers were found in the text with the defined questions.")
 
318
  st.write(f"Error details: {e}")
319
  if comet_initialized:
320
  experiment.log_text(f"Error: {e}")
321
+ experiment.end()