CPo_droid / appStore /excel_convert.py
ppsingh's picture
Update appStore/excel_convert.py
bfa165f verified
# set path
import glob, os, sys;
sys.path.append('../utils')
#import needed libraries
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import streamlit as st
from st_aggrid import AgGrid
import logging
logger = logging.getLogger(__name__)
from utils.config import get_classifier_params
from io import BytesIO
import xlsxwriter
import plotly.express as px
from pandas.api.types import (
is_categorical_dtype,
is_datetime64_any_dtype,
is_numeric_dtype,
is_object_dtype,
is_list_like)
def to_excel():
if 'key1' in st.session_state:
df = st.session_state['key1']
len_df = len(df)
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='rawdata')
def build_sheet(df,name):
df = df[df.keep == True]
df = df.reset_index(drop=True)
df.drop(columns = ['keep'], inplace=True)
df.to_excel(writer,index=False,sheet_name = name)
if 'target_hits' in st.session_state:
target_hits = st.session_state['target_hits']
build_sheet(target_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector','Sub-Target']],'Target')
if 'action_hits' in st.session_state:
action_hits = st.session_state['action_hits']
build_sheet(action_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Actions')
if 'policy_hits' in st.session_state:
policy_hits = st.session_state['policy_hits']
build_sheet(policy_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Policy')
if 'plan_hits' in st.session_state:
plan_hits = st.session_state['plan_hits']
build_sheet(plan_hits[['text','page','keep','MitigationLabel','AdaptationLabel','Sector']],'Plans')
workbook = writer.book
writer.close()
processed_data = output.getvalue()
return processed_data
def filter_dataframe(key, cols):
"""
Adds a UI on top of a dataframe to let viewers filter columns
Args:
key: key to look for in session_state
cols: columns to use for filter in that order
Returns:
None
"""
modify = st.checkbox("Add filters")
if not modify:
return
if key not in st.session_state:
return
else:
df = st.session_state[key]
df = df[cols + list(set(df.columns) - set(cols))]
if len(df)==0:
return
modification_container = st.container()
with modification_container:
temp = list(set(cols) -{'page','keep'})
to_filter_columns = st.multiselect("Filter dataframe on", temp)
for column in to_filter_columns:
left, right = st.columns((1, 20))
left.write("↳")
# Treat columns with < 10 unique values as categorical
if is_categorical_dtype(df[column]):
# st.write(type(df[column][0]), column)
user_cat_input = right.multiselect(
f"Values for {column}",
df[column].unique(),
default=list(df[column].unique()),
)
df = df[df[column].isin(user_cat_input)]
elif is_numeric_dtype(df[column]):
_min = float(df[column].min())
_max = float(df[column].max())
step = (_max - _min) / 100
user_num_input = right.slider(
f"Values for {column}",
_min,
_max,
(_min, _max),
step=step,
)
df = df[df[column].between(*user_num_input)]
elif is_list_like(df[column]) & (type(df[column][0]) == list) :
list_vals = set(x for lst in df[column].tolist() for x in lst)
user_multi_input = right.multiselect(
f"Values for {column}",
list_vals,
default=list_vals,
)
df['check'] = df[column].apply(lambda x: any(i in x for i in user_multi_input))
df = df[df.check == True]
df.drop(columns = ['check'],inplace=True)
else:
user_text_input = right.text_input(
f"Substring or regex in {column}",
)
if user_text_input:
df = df[df[column].str.lower().str.contains(user_text_input)]
df = df.reset_index(drop=True)
df = st.data_editor(
df,
column_config={
"keep": st.column_config.CheckboxColumn(
help="Select which rows to keep",
default=False,
)
},
disabled=list(set(df.columns) - {'keep'}),
hide_index=True,
key = 'editor'+key,
)
#("updating target hits....")
# st.write(len(df[df.keep == True]))
st.session_state[key] = df
return