vives's picture
Update app.py
60f1365
"""The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching"""
import streamlit as st
import ast
import pandas as pd
import random
from time import sleep
threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2)
num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5)
xl1 = st.file_uploader("Choose first file", key="xl1")
#xl2 = st.file_uploader("Choose second file", key="xl2")
def merge_dicts(x):
return {k: v for d in x.dropna() for k, v in d.items()}
def clean_dict(x):
return x.replace("'", '"')
if xl1 is not None :
#assert that the first few columns are the same
df1 = pd.read_excel(xl1, sheet_name= f"{threshold} Threshold")
#first convert strings into dicts
df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict)
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x))
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"})
#now pop direct matches
df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99})
df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True)
#now merge same KPs and their respective dicts
new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts)
new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys())))
new_df = new_df.sort_values(by="dict len", ascending=False)
new_df.reset_index(inplace=True)
#new_df = new_df.drop("dict len", axis=1)
with st.form("First excel file"):
choices = []
i = 0
if num_kp > new_df.shape[0] :
num_kp = new_df.shape[0]
for t1 in new_df.sample(n=num_kp,random_state=42).iterrows():
#for t1 in new_df.sample(n=10, random_state=42).iterrows():
r1 = t1[1]
kps1 = r1["Matched KPs"]
curr_keys = list(kps1.keys()).copy()
for kp1 in curr_keys:
if kps1[kp1] > 0.99:
kps1.pop(kp1)
# now display the kps
if kps1 == {}:
continue
else:
col1, col2 = st.columns(2)
with col1:
st.write(r1["KP"])
with col2:
#if number of keys > 5, then shuffle them and select 5 random
if len(list(kps1.keys())) > 5:
#we can repurpose curr_keys as it was used to pop direct matches
curr_keys = list(kps1.keys())
random.Random(42).shuffle(curr_keys)
curr_keys = curr_keys[:5]
else:
curr_keys = list(kps1.keys())
for kp1 in curr_keys:
choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i))
i+=1
st.markdown("""---""")
submitted = st.form_submit_button("Submit")
if submitted:
st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}")