"""The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching""" import streamlit as st import ast import pandas as pd import random from time import sleep threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2) num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5) xl1 = st.file_uploader("Choose first file", key="xl1") #xl2 = st.file_uploader("Choose second file", key="xl2") def merge_dicts(x): return {k: v for d in x.dropna() for k, v in d.items()} def clean_dict(x): return x.replace("'", '"') if xl1 is not None : #assert that the first few columns are the same df1 = pd.read_excel(xl1, sheet_name= f"{threshold} Threshold") #first convert strings into dicts df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict) df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x)) df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"}) #now pop direct matches df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99}) df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True) #now merge same KPs and their respective dicts new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts) new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys()))) new_df = new_df.sort_values(by="dict len", ascending=False) new_df.reset_index(inplace=True) #new_df = new_df.drop("dict len", axis=1) with st.form("First excel file"): choices = [] i = 0 if num_kp > new_df.shape[0] : num_kp = new_df.shape[0] for t1 in new_df.sample(n=num_kp,random_state=42).iterrows(): #for t1 in new_df.sample(n=10, random_state=42).iterrows(): r1 = t1[1] kps1 = r1["Matched KPs"] curr_keys = list(kps1.keys()).copy() for kp1 in curr_keys: if kps1[kp1] > 0.99: kps1.pop(kp1) # now display the kps if kps1 == {}: continue else: col1, col2 = st.columns(2) with col1: st.write(r1["KP"]) with col2: #if number of keys > 5, then shuffle them and select 5 random if len(list(kps1.keys())) > 5: #we can repurpose curr_keys as it was used to pop direct matches curr_keys = list(kps1.keys()) random.Random(42).shuffle(curr_keys) curr_keys = curr_keys[:5] else: curr_keys = list(kps1.keys()) for kp1 in curr_keys: choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i)) i+=1 st.markdown("""---""") submitted = st.form_submit_button("Submit") if submitted: st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}")