import streamlit as st import os import pkg_resources # Using this wacky hack to get around the massively ridicolous managed env loading order def is_installed(package_name, version): try: pkg = pkg_resources.get_distribution(package_name) return pkg.version == version except pkg_resources.DistributionNotFound: return False # shifted from below - this must be the first streamlit call; otherwise: problems st.set_page_config(page_title = 'Vulnerability Analysis', initial_sidebar_state='expanded', layout="wide") @st.cache_resource # cache the function so it's not called every time app.py is triggered def install_packages(): install_commands = [] if not is_installed("spaces", "0.12.0"): install_commands.append("pip install spaces==0.17.0") if not is_installed("pydantic", "1.8.2"): install_commands.append("pip install pydantic==1.8.2") if not is_installed("typer", "0.4.0"): install_commands.append("pip install typer==0.4.0") if install_commands: os.system(" && ".join(install_commands)) # install packages if necessary install_packages() import appStore.vulnerability_analysis as vulnerability_analysis import appStore.target as target_analysis import appStore.doc_processing as processing from utils.uploadAndExample import add_upload from utils.vulnerability_classifier import label_dict import pandas as pd import plotly.express as px #st.set_page_config(page_title = 'Vulnerability Analysis', # initial_sidebar_state='expanded', layout="wide") with st.sidebar: # upload and example doc choice = st.sidebar.radio(label = 'Select the Document', help = 'You can upload the document \ or else you can try a example document', options = ('Upload Document', 'Try Example'), horizontal = True) add_upload(choice) with st.container(): st.markdown("

Vulnerability Analysis 2.0

", unsafe_allow_html=True) st.write(' ') with st.expander("ℹ️ - About this app", expanded=False): st.write( """ The Vulnerability Analysis App is an open-source\ digital tool which aims to assist policy analysts and \ other users in extracting and filtering references \ to different groups in vulnerable situations from public documents. \ We use Natural Language Processing (NLP), specifically deep \ learning-based text representations to search context-sensitively \ for mentions of the special needs of groups in vulnerable situations to cluster them thematically. """) st.write(""" What Happens in background? - Step 1: Once the document is provided to app, it undergoes *Pre-processing*.\ In this step the document is broken into smaller paragraphs \ (based on word/sentence count). - Step 2: The paragraphs are then fed to the **Vulnerability Classifier** which detects if the paragraph contains any or multiple references to vulnerable groups. """) st.write("") # Define the apps used apps = [processing.app, vulnerability_analysis.app, target_analysis.app] multiplier_val =1/len(apps) if st.button("Analyze Document"): prg = st.progress(0.0) for i,func in enumerate(apps): func() prg.progress((i+1)*multiplier_val) # If there is data stored if 'key0' in st.session_state: vulnerability_analysis.vulnerability_display() #target_analysis.target_display() # ################################################################### # #with st.sidebar: # # topic = st.radio( # # "Which category you want to explore?", # # (['Vulnerability', 'Concrete targets/actions/measures'])) # #if topic == 'Vulnerability': # # Assign dataframe a name # df_vul = st.session_state['key0'] # st.write(df_vul) # col1, col2 = st.columns([1,1]) # with col1: # # Header # st.subheader("Explore references to vulnerable groups:") # # Text # num_paragraphs = len(df_vul['Vulnerability Label']) # num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum() # st.markdown(f"""
The document contains a # total of {num_paragraphs} paragraphs. # We identified {num_references} # references to vulnerable groups.
#
# In the pie chart on the right you can see the distribution of the different # groups defined. For a more detailed view in the text, see the paragraphs and # their respective labels in the table below.""", unsafe_allow_html=True) # with col2: # ### Bar chart # # # Create a df that stores all the labels # df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label']) # # Count how often each label appears in the "Vulnerability Labels" column # group_counts = {} # # Iterate through each sublist # for index, row in df_vul.iterrows(): # # Iterate through each group in the sublist # for sublist in row['Vulnerability Label']: # # Update the count in the dictionary # group_counts[sublist] = group_counts.get(sublist, 0) + 1 # # Create a new dataframe from group_counts # df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count']) # # Merge the label counts with the df_label DataFrame # df_label_count = df_labels.merge(df_label_count, on='Label', how='left') # st.write("df_label_count") # # # Configure graph # # fig = px.pie(df_labels, # # names="Label", # # values="Count", # # title='Label Counts', # # hover_name="Count", # # color_discrete_sequence=px.colors.qualitative.Plotly # # ) # # #Show plot # # st.plotly_chart(fig, use_container_width=True) # # ### Table # st.table(df_vul[df_vul['Vulnerability Label'] != 'Other']) # vulnerability_analysis.vulnerability_display() # elif topic == 'Action': # policyaction.action_display() # else: # policyaction.policy_display() #st.write(st.session_state.key0)