Spaces:

AIEcosystem
/

HR.ai

Sleeping

App Files Files Community

AIEcosystem commited on Sep 8

Commit

d1553d4

verified ·

1 Parent(s): 8f31f8b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +16 -59

src/streamlit_app.py CHANGED Viewed

@@ -1,8 +1,5 @@
 import os
 os.environ['HF_HOME'] = '/tmp'
 import time
 import streamlit as st
 import pandas as pd
@@ -16,11 +13,8 @@ from gliner import GLiNER
 from comet_ml import Experiment
 import hashlib
 # --- Page Configuration and UI Elements ---
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 st.markdown(
     """
     <style>
@@ -69,20 +63,11 @@ st.markdown(
     }
     </style>
     """,
-    unsafe_allow_html=True
-)
 st.subheader("HR.ai", divider="green")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
-expander.write("""**Named Entities:** This HR.ai predicts thirty-five (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
-Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
-**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
-**Usage Limits:** You can request results unlimited times for one (1) month.
-**Supported Languages:** English
-**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 with st.sidebar:
     st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
     code = '''
@@ -119,8 +104,7 @@ category_mapping = {
     "Deductions": ["Tax", "Deductions"],
     "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
     "Legal & Compliance": ["Offer_letter", "Agreement"],
-    "Professional_Development": ["Certification", "Skill"]
-}
 # --- Model Loading ---
 @st.cache_resource
@@ -131,7 +115,6 @@ def load_ner_model():
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 model = load_ner_model()
 # Flatten the mapping to a single dictionary
@@ -188,7 +171,6 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
                 st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
             else:
                 st.info(f"No entities found for the '{category_name}' category.")
     with st.expander("See Glossary of tags"):
         st.write('''
         - **text**: ['entity extracted from your text data']
@@ -198,15 +180,12 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
         - **start**: ['index of the start of the corresponding entity']
         - **end**: ['index of the end of the corresponding entity']
         ''')
     st.divider()
     st.subheader("Candidate Card", divider="green")
     fig_treemap = px.treemap(st.session_state.df_ner, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
     st.plotly_chart(fig_treemap)
     dfa = pd.DataFrame(
         data={
             'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
@@ -220,26 +199,30 @@ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
     )
     buf = io.BytesIO()
     with zipfile.ZipFile(buf, "w") as myzip:
-        myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
         myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
     with stylable_container(
         key="download_button",
-                    css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
     ):
         st.download_button(
         label="Download results and glossary (zip)",
         data=buf.getvalue(),
         file_name="nlpblogs_results.zip",
         mime="application/zip",)
     if comet_initialized:
         experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
         experiment.end()
     else:
         st.warning("No entities were found in the provided text.")
 # --- Question Answering Section ---
 @st.cache_resource
 def load_gliner_model():
@@ -251,9 +234,7 @@ def load_gliner_model():
         st.stop()
 qa_model = load_gliner_model()
 st.subheader("Question-Answering", divider="green")
 if 'user_labels' not in st.session_state:
     st.session_state.user_labels = []
@@ -284,9 +265,7 @@ if st.session_state.user_labels:
                 st.rerun()
 else:
     st.info("No questions defined yet. Use the input above to add one.")
 st.divider()
 if st.button("Extract Answers"):
     if not text.strip():
         st.warning("Please enter some text to analyze.")
@@ -317,7 +296,7 @@ if st.button("Extract Answers"):
                     st.dataframe(df_qa, use_container_width=True)
                     csv_data = df_qa.to_csv(index=False).encode('utf-8')
                     with stylable_container(
-                        key="download_button",
                         css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
                     ):
                         st.download_button(
@@ -326,12 +305,9 @@ if st.button("Extract Answers"):
                             file_name="nlpblogs_extracted_answers.csv",
                             mime="text/csv",
                         )
                     if comet_initialized:
                         experiment.log_metric("processing_time_seconds", elapsed_time)
-                        experiment.log_table("predicted_entities", df)
-                        experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
                         experiment.end()
                 else:
                     st.info("No answers were found in the text with the defined questions.")
@@ -342,23 +318,4 @@ if st.button("Extract Answers"):
                 st.write(f"Error details: {e}")
                 if comet_initialized:
                     experiment.log_text(f"Error: {e}")
-                    experiment.end()
-                else:
-                    st.warning("No answers were found for the provided questions.")
-                    if 'df_qa' in st.session_state:
-                        del st.session_state.df_qa
-            except Exception as e:
-                st.error(f"An error occurred during answer extraction: {e}")

 import os
 os.environ['HF_HOME'] = '/tmp'
 import time
 import streamlit as st
 import pandas as pd
 from comet_ml import Experiment
 import hashlib
 # --- Page Configuration and UI Elements ---
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 st.markdown(
     """
     <style>
     }
     </style>
     """,
+    unsafe_allow_html=True)
 st.subheader("HR.ai", divider="green")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
+expander.write("""**Named Entities:** This HR.ai predicts thirty-five (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Supported Languages:** English**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 with st.sidebar:
     st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
     code = '''
     "Deductions": ["Tax", "Deductions"],
     "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
     "Legal & Compliance": ["Offer_letter", "Agreement"],
+    "Professional_Development": ["Certification", "Skill"]}
 # --- Model Loading ---
 @st.cache_resource
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 model = load_ner_model()
 # Flatten the mapping to a single dictionary
                 st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
             else:
                 st.info(f"No entities found for the '{category_name}' category.")
     with st.expander("See Glossary of tags"):
         st.write('''
         - **text**: ['entity extracted from your text data']
         - **start**: ['index of the start of the corresponding entity']
         - **end**: ['index of the end of the corresponding entity']
         ''')
     st.divider()
     st.subheader("Candidate Card", divider="green")
     fig_treemap = px.treemap(st.session_state.df_ner, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
     st.plotly_chart(fig_treemap)
+    df_ner_results = st.session_state.df_ner.drop(columns=['category']) # Define df_ner_results here
     dfa = pd.DataFrame(
         data={
             'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
     )
     buf = io.BytesIO()
     with zipfile.ZipFile(buf, "w") as myzip:
+        myzip.writestr("Summary of the results.csv", df_ner_results.to_csv(index=False)) # Use df_ner_results
         myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
     with stylable_container(
         key="download_button",
+        css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
     ):
         st.download_button(
         label="Download results and glossary (zip)",
         data=buf.getvalue(),
         file_name="nlpblogs_results.zip",
         mime="application/zip",)
     if comet_initialized:
+        experiment = Experiment(
+            api_key=COMET_API_KEY,
+            workspace=COMET_WORKSPACE,
+            project_name=COMET_PROJECT_NAME,
+        )
         experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
         experiment.end()
     else:
         st.warning("No entities were found in the provided text.")
 # --- Question Answering Section ---
 @st.cache_resource
 def load_gliner_model():
         st.stop()
 qa_model = load_gliner_model()
 st.subheader("Question-Answering", divider="green")
 if 'user_labels' not in st.session_state:
     st.session_state.user_labels = []
                 st.rerun()
 else:
     st.info("No questions defined yet. Use the input above to add one.")
 st.divider()
 if st.button("Extract Answers"):
     if not text.strip():
         st.warning("Please enter some text to analyze.")
                     st.dataframe(df_qa, use_container_width=True)
                     csv_data = df_qa.to_csv(index=False).encode('utf-8')
                     with stylable_container(
+                        key="download_button_qa",
                         css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
                     ):
                         st.download_button(
                             file_name="nlpblogs_extracted_answers.csv",
                             mime="text/csv",
                         )
                     if comet_initialized:
                         experiment.log_metric("processing_time_seconds", elapsed_time)
+                        experiment.log_table("predicted_entities", df_qa)
                         experiment.end()
                 else:
                     st.info("No answers were found in the text with the defined questions.")
                 st.write(f"Error details: {e}")
                 if comet_initialized:
                     experiment.log_text(f"Error: {e}")
+                    experiment.end()