Spaces:

orionweller
/

test-analysis

Runtime error

App Files Files Community

test-analysis / app.py

orionweller

Update app.py

50dea48 11 months ago

raw

history blame contribute delete

No virus

34.4 kB

	import streamlit as st
	import os
	import pathlib
	import beir
	from beir import util
	from beir.datasets.data_loader import GenericDataLoader
	import pytrec_eval
	import pandas as pd
	from collections import defaultdict
	import json
	import copy
	import plotly.express as px

	from constants import ALL_DATASETS, ALL_METRICS
	from dataset_loading import get_dataset, load_run, load_local_qrels, load_local_corpus, load_local_queries
	from analysis import create_boxplot_1df, create_boxplot_2df, create_boxplot_diff, get_model, prep_func


	os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
	st.set_page_config(layout="wide")


	if 'cur_instance_num' not in st.session_state:
	st.session_state.cur_instance_num = -1


	def update_details(run_details, run_score):
	if run_score == 0:
	run_details["none"] += 1
	elif run_score == 1:
	run_details["perfect"] += 1
	else:
	run_details["inbetween"] += 1
	return run_details


	def check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
	if run1_file is not None and dataset_name not in ["", None, "custom"]:
	return True
	elif run1_file is not None and dataset_name == "custom":
	if qrels is not None and queries is not None and corpus is not None:
	return True
	return False


	def validate(config_option, file_loaded):
	if config_option != "None" and file_loaded is None:
	st.error("Please upload a file for " + config_option)
	st.stop()


	def combine(text_og, text_new, combine_type):
	if combine_type == "None":
	return text_og
	elif combine_type == "Append":
	return text_og + " <APPEND> " + text_new
	elif combine_type == "Prepend":
	return text_new + " <PREPEND> " + text_og
	elif combine_type == "Replace":
	return text_new
	else:
	raise ValueError("Invalid combine type")

	with st.sidebar:
	st.title("Options")
	dataset_name = st.selectbox("Select a preloaded dataset or upload your own (note: some datasets are large/slow)", tuple(ALL_DATASETS))
	if st.checkbox("Choose fields (applies to IR_Datasets only)"):
	input_fields_doc = st.text_input("Type the name of the doc fields to get, with commas (blank=all)")
	if input_fields_doc in ["", None]:
	input_fields_doc = None
	input_fields_query = st.sidebar.text_input("Type the name of the query fields to get, with commas (blank=all)")
	if input_fields_query in ["", None]:
	input_fields_query = None
	else:
	input_fields_doc = None
	input_fields_query = None

	metric_name = st.selectbox("Select a metric", tuple(ALL_METRICS))

	if dataset_name == "custom":
	st.header("Upload corpus")
	corpus_file = st.file_uploader("Choose a file", key="corpus")
	corpus = load_local_corpus(corpus_file)
	st.header("Upload queries")
	queries_file = st.file_uploader("Choose a file", key="queries")
	queries = load_local_queries(queries_file)
	st.header("Upload qrels")
	qrels_file = st.file_uploader("Choose a file", key="qrels")
	qrels = load_local_qrels(qrels_file)
	else:
	qrels = None
	queries = None
	corpus = None


	x = st.header('Upload a run file')
	run1_file = st.file_uploader("Choose a file", key="run1")
	y = st.header("Upload a second run file")
	run2_file = st.file_uploader("Choose a file", key="run2")

	z = st.header("Analysis Options")
	# sliderbar of how many Top N to choose
	top_n = st.slider("Top N Ranked Docs", 1, 100, 3)
	n_relevant_docs = st.slider("Number of relevant docs", 1, 100, 3)
	incorrect_only = st.checkbox("Show only incorrect instances", value=False)
	one_better_than_two = st.checkbox("Show only instances where run 1 is better than run 2", value=False)
	two_better_than_one = st.checkbox("Show only instances where run 2 is better than run 1", value=False)
	use_model_saliency = st.checkbox("Use model saliency (slow!)", value=False)
	if use_model_saliency:
	# choose from a list of models
	model_name = st.selectbox("Choose from a list of models", ["MonoT5-Small", "MonoT5-3B"])
	model, formatter = get_model(model_name)
	get_saliency = prep_func(model, formatter)


	advanced_options1 = st.checkbox("Show advanced options for Run 1", value=False)
	doc_expansion1 = doc_expansion2 = None
	query_expansion1 = query_expansion2 = None
	run1_uses_query_expansion = "None"
	run1_uses_doc_expansion = "None"
	run2_uses_query_expansion = "None"
	run2_uses_doc_expansion = "None"
	if advanced_options1:
	doc_header = st.header("Upload a Document Expansion file")
	doc_expansion_file = st.file_uploader("Choose a file", key="doc_expansion")
	if doc_expansion_file is not None:
	doc_expansion1 = load_local_corpus(doc_expansion_file)
	query_header = st.header("Upload a Query Expansion file")
	query_expansion_file = st.file_uploader("Choose a file", key="query_expansion")
	if query_expansion_file is not None:
	query_expansion1 = load_local_queries(query_expansion_file)

	run1_uses_query_expansion = st.selectbox("Type of query expansion used in run 1", ("None", "Append", "Prepend", "Replace"))
	run1_uses_doc_expansion = st.selectbox("Type of document expansion used in run 1", ("None", "Append", "Prepend", "Replace"))
	validate(run1_uses_query_expansion, query_expansion_file)
	validate(run1_uses_doc_expansion, doc_expansion_file)

	advanced_options2 = st.checkbox("Show advanced options for Run 2", value=False)
	if advanced_options2:
	doc_header = st.header("Upload a Document Expansion file")
	doc_expansion_file = st.file_uploader("Choose a file", key="doc_expansion2")
	if doc_expansion_file is not None:
	doc_expansion2 = load_local_corpus(doc_expansion_file)
	query_header = st.header("Upload a Query Expansion file")
	query_expansion_file = st.file_uploader("Choose a file", key="query_expansion2")
	if query_expansion_file is not None:
	query_expansion2 = load_local_queries(query_expansion_file)

	run2_uses_query_expansion = st.selectbox("Type of query expansion used in run 2", ("None", "Append", "Prepend", "Replace"))
	run2_uses_doc_expansion = st.selectbox("Type of document expansion used in run 2", ("None", "Append", "Prepend", "Replace"))
	validate(run2_uses_query_expansion, query_expansion_file)
	validate(run2_uses_doc_expansion, doc_expansion_file)


	# everything hinges on the run being uploaded, so do that first
	# init_title = st.title("Upload Run and Choose Details")

	if run1_file is not None:
	run1, run1_pandas = load_run(run1_file)

	# do everything, now that we have the run file
	if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
	# init_title = st.title("Analysis")
	# don't load these til a run is given
	if dataset_name != "custom":
	corpus, queries, qrels = get_dataset(dataset_name, input_fields_doc, input_fields_query)

	evaluator = pytrec_eval.RelevanceEvaluator(
	copy.deepcopy(qrels), pytrec_eval.supported_measures)
	results1 = evaluator.evaluate(run1) # dict of instance then metrics then values
	average_run1_score = pytrec_eval.compute_aggregated_measure(metric_name, [query_measures[metric_name] for query_measures in results1.values()])
	if len(results1) == 0:
	# alert and stop
	st.error("Run file is empty")
	st.stop()

	if run2_file is not None:
	run2, run2_pandas = load_run(run2_file)
	# NOTE: will fail if run1 is not uploaded
	evaluator2 = pytrec_eval.RelevanceEvaluator(
	copy.deepcopy(qrels), pytrec_eval.supported_measures)
	results2 = evaluator2.evaluate(run2)
	average_run2_score = pytrec_eval.compute_aggregated_measure(metric_name, [query_measures[metric_name] for query_measures in results2.values()])


	col1, col2 = st.columns([1, 3], gap="large")

	# incorrect = 0
	is_better_run1_count = 0
	is_better_run2_count = 0
	is_same_count = 0
	run1_details = {"none": 0, "perfect": 0, "inbetween": 0}
	run2_details = {"none": 0, "perfect": 0, "inbetween": 0}
	with col1:
	st.title("Instances")
	if run1_file is not None:
	set_of_cols = set(run1_pandas.qid.tolist())
	container_for_nav = st.container()
	name_of_columns = sorted([item for item in set_of_cols])
	instances_to_use = []
	# st.divider()
	for idx in range(len(name_of_columns)):
	is_incorrect = False
	is_better_run1 = False
	is_better_run2 = False

	run1_score = results1[str(name_of_columns[idx])][metric_name] if idx else 1
	run1_details = update_details(run1_details, run1_score)
	if run2_file is not None:
	run2_score = results2[str(name_of_columns[idx])][metric_name] if idx else 1
	run2_details = update_details(run2_details, run2_score)

	if run1_score == 0 or run2_score == 0:
	is_incorrect = True

	if run1_score > run2_score:
	is_better_run1_count += 1
	is_better_run1 = True
	elif run2_score > run1_score:
	is_better_run2_count += 1
	is_better_run2 = True
	else:
	is_same_count += 1


	if not incorrect_only or is_incorrect:
	if not one_better_than_two or is_better_run1:
	if not two_better_than_one or is_better_run2:
	# check = st.checkbox(f"{idx}. " + str(name_of_columns[idx]), key=f"{idx}check")
	# st.divider()
	instances_to_use.append(name_of_columns[idx])
	else:
	if run1_score == 0:
	is_incorrect = True

	if not incorrect_only or is_incorrect:
	# check = st.checkbox(f"{idx}. " + str(name_of_columns[idx]), key=f"{idx}check")
	# st.divider()
	instances_to_use.append(name_of_columns[idx])


	def sync_from_drop():
	if st.session_state.selectbox_instance == "Overview":
	st.session_state.number_of_col = -1
	st.session_state.cur_instance_num = -1
	else:
	index_of_obj = name_of_columns.index(st.session_state.selectbox_instance)
	# print("Index of obj: ", index_of_obj, type(index_of_obj))
	st.session_state.number_of_col = index_of_obj
	st.session_state.cur_instance_num = index_of_obj

	def sync_from_number():
	st.session_state.cur_instance_num = st.session_state.number_of_col
	# print("Session state number of col: ", st.session_state.number_of_col, type(st.session_state.number_of_col))
	if st.session_state.number_of_col == -1:
	st.session_state.selectbox_instance = "Overview"
	else:
	st.session_state.selectbox_instance = name_of_columns[st.session_state.number_of_col]


	number_of_col = container_for_nav.number_input(min_value=-1, step=1, max_value=len(instances_to_use) - 1, on_change=sync_from_number, label=f"Select instance by index (up to {len(instances_to_use) - 1})", key="number_of_col")
	selectbox_instance = container_for_nav.selectbox("Select instance by ID", ["Overview"] + name_of_columns, on_change=sync_from_drop, key="selectbox_instance")
	st.divider()
	# make pie plot showing incorrect vs correct
	st.header("Breakdown")


	if run2_file is None:
	overall_scores_container = st.container()
	left_score, right_score = overall_scores_container.columns([1, 1])
	left_score.metric(label=f"Run 1 {metric_name}", value=round(average_run1_score, 3))
	right_score.metric(label="#Q", value=len(results1))

	plotly_pie_chart = px.pie(names=["Perfect", "Inbetween", "None"], values=[run1_details["perfect"], run1_details["inbetween"], run1_details["none"]])
	st.write("Run 1 Scores")
	plotly_pie_chart.update_traces(showlegend=False, selector=dict(type='pie'), textposition='inside', textinfo='percent+label')
	st.plotly_chart(plotly_pie_chart, use_container_width=True)
	else:
	overall_scores_container = st.container()
	left_score, right_score = overall_scores_container.columns([1, 1])
	left_score.metric(label=f"Run 1 {metric_name}", value=round(average_run1_score, 3))
	right_score.metric(label=f"Run 2 {metric_name}", value=round(average_run2_score, 3))

	if st.checkbox("Show Run 1 vs Run 2", value=True):
	plotly_pie_chart = px.pie(names=["Run 1 Better", "Run 2 Better", "Tied"], values=[is_better_run1_count, is_better_run2_count, is_same_count])
	plotly_pie_chart.update_traces(showlegend=False, selector=dict(type='pie'), textposition='inside', textinfo='percent+label')
	st.plotly_chart(plotly_pie_chart, use_container_width=True)

	if st.checkbox("Show Run 1 Breakdown"):
	plotly_pie_chart_run1 = px.pie(names=["Perfect", "Inbetween", "None"], values=[run1_details["perfect"], run1_details["inbetween"], run1_details["none"]])
	plotly_pie_chart_run1.update_traces(showlegend=False, selector=dict(type='pie'), textposition='inside', textinfo='percent+label')
	st.plotly_chart(plotly_pie_chart_run1, use_container_width=True)
	if st.checkbox("Show Run 2 Breakdown"):
	plotly_pie_chart_run2 = px.pie(names=["Perfect", "Inbetween", "None"], values=[run2_details["perfect"], run2_details["inbetween"], run2_details["none"]])
	plotly_pie_chart_run2.update_traces(showlegend=False, selector=dict(type='pie'), textposition='inside', textinfo='percent+label')
	st.plotly_chart(plotly_pie_chart_run2, use_container_width=True)



	with col2:
	# st.title(f"Information ({len(checkboxes) - 1}/{len(name_of_columns) - 1})")
	### Only one run file
	if run1_file is not None and run2_file is None:

	# get instance number
	inst_index = number_of_col

	if inst_index >= 0:
	inst_num = instances_to_use[inst_index - 1]

	st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Run 1</h1>", unsafe_allow_html=True)

	container = st.container()

	rank_col, score_col, id_col = container.columns([2,1,3])
	id_col.metric("ID", inst_num)
	score_col.metric(metric_name, results1[str(inst_num)][metric_name])

	# st.subheader(f"ID")
	# st.markdown(inst_num)
	st.divider()

	st.subheader(f"Query")
	if run1_uses_query_expansion != "None":
	show_orig_rel = st.checkbox("Show Original Query", key=f"{inst_index}reloriguery", value=False)

	query_text_og = queries[str(inst_num)]
	if query_expansion1 is not None and run1_uses_query_expansion != "None" and not show_orig_rel:
	alt_text = query_expansion1[str(inst_num)]
	query_text = combine(query_text_og, alt_text, run1_uses_query_expansion)
	else:
	query_text = query_text_og
	st.markdown(query_text)
	st.divider()

	## Documents
	# relevant
	relevant_docs = list(qrels[str(inst_num)].keys())[:n_relevant_docs]
	doc_texts = [(doc_id, corpus[doc_id]["title"] if "title" in corpus[doc_id] else "", corpus[doc_id]["text"]) for doc_id in relevant_docs]
	st.subheader("Relevant Documents")
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None":
	show_orig_rel = st.checkbox("Show Original Relevant Doc(s)", key=f"{inst_index}relorig", value=False)

	for (docid, title, text) in doc_texts:
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel:
	alt_text = doc_expansion1[docid]["text"]
	text = combine(text, alt_text, run1_uses_doc_expansion)

	if use_model_saliency:
	if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency", value=False):
	st.markdown(get_saliency(query_text, doc_texts),unsafe_allow_html=True)
	else:
	st.text_area(f"{docid}:", text)

	else:
	st.text_area(f"{docid}:", text)


	# go through each of the relevant documents
	ranks = []
	for docid in relevant_docs:
	pred_doc = run1_pandas[run1_pandas.doc_id.isin([docid])]
	rank_pred = pred_doc[pred_doc.qid == str(inst_num)]
	if rank_pred.empty:
	ranks.append("-")
	else:
	ranks.append(rank_pred.iloc[0]["rank"])
	# st.subheader("Ranked of Documents")
	# st.markdown(f"Rank: {rank_pred}")
	ranking_str = ",".join([str(item) for item in ranks])
	if ranking_str == "":
	ranking_str = "-"
	rank_col.metric(f"Rank of Relevant Doc(s)", ranking_str)
	# breakpoint()


	st.divider()

	# top ranked

	if st.checkbox('Show top ranked documents', key=f"{inst_index}top-1run"):
	st.subheader("Top N Ranked Documents")
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None":
	show_orig_rel_ranked = st.checkbox("Show Original Ranked Doc(s)", key=f"{inst_index}relorigdocs", value=False)

	run1_top_n = run1_pandas[run1_pandas.qid == str(inst_num)][:top_n]
	run1_top_n_docs = [corpus[str(doc_id)] for doc_id in run1_top_n.doc_id.tolist()]
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel_ranked:
	run1_top_n_docs_alt = [doc_expansion1[str(doc_id)] for doc_id in run1_top_n.doc_id.tolist()]
	for d_idx, doc in enumerate(run1_top_n_docs):
	alt_text = run1_top_n_docs_alt[d_idx]["text"]
	doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
	if use_model_saliency:
	if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency", value=False):
	st.markdown(get_saliency(query_text, doc_text),unsafe_allow_html=True)
	else:
	st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}")
	else:
	st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}")
	else:
	for d_idx, doc in enumerate(run1_top_n_docs):
	if use_model_saliency:
	if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked", value=False):
	st.markdown(get_saliency(query_text, doc),unsafe_allow_html=True)
	else:
	st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}")
	else:
	st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}")
	st.divider()

	# none checked
	elif inst_index < 0:
	st.title("Overview")
	st.subheader(f"Scores of {metric_name}")
	plotly_chart = create_boxplot_1df(results1, metric_name)
	st.plotly_chart(plotly_chart)

	## Both run files available
	elif run1_file is not None and run2_file is not None:
	has_check = False
	container_top = st.container()

	# get instance number
	inst_index = number_of_col

	if inst_index >= 0:
	inst_num = instances_to_use[inst_index]

	col_run1, col_run2 = container_top.columns([1,1])
	col_run1.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Run 1</h1>", unsafe_allow_html=True)
	col_run2.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Run 2</h1>", unsafe_allow_html=True)

	container_overview = st.container()
	rank_col1, score_col1, rank_col2, score_col2 = container_overview.columns([2,1,2,1])
	# id_col1.metric("", "")
	score_col1.metric("Run 1 " + metric_name, results1[str(inst_num)][metric_name])
	score_col2.metric("Run 2 " + metric_name, results2[str(inst_num)][metric_name])

	st.divider()

	st.subheader(f"Query")
	container_two_query = st.container()
	col_run1, col_run2 = container_two_query.columns(2, gap="medium")

	query_text_og = queries[str(inst_num)]
	if run1_uses_query_expansion != "None" and run2_uses_query_expansion != "None":
	alt_text1 = query_expansion1[str(inst_num)]
	alt_text2 = query_expansion2[str(inst_num)]
	combined_text1 = combine(query_text_og, alt_text1, run1_uses_query_expansion)
	combined_text2 = combine(query_text_og, alt_text2, run2_uses_query_expansion)
	col_run1.markdown(combined_text1)
	col_run2.markdown(combined_text2)
	query_text1 = combined_text1
	query_text2 = combined_text2
	elif run1_uses_query_expansion != "None":
	alt_text = query_expansion1[str(inst_num)]
	combined_text1 = combine(query_text_og, alt_text, run1_uses_query_expansion)
	col_run1.markdown(combined_text1)
	col_run2.markdown(query_text_og)
	query_text1 = combined_text1
	query_text2 = query_text_og
	elif run2_uses_query_expansion != "None":
	alt_text = query_expansion2[str(inst_num)]
	combined_text2 = combine(query_text_og, alt_text, run2_uses_query_expansion)
	col_run1.markdown(query_text_og)
	col_run2.markdown(combined_text2)
	query_text1 = query_text_og
	query_text2 = combined_text2
	else:
	query_text = query_text_og
	col_run1.markdown(query_text)
	col_run2.markdown(query_text)
	query_text1 = query_text
	query_text2 = query_text

	st.divider()



	## Documents
	# relevant
	st.subheader("Relevant Documents")
	container_two_docs_rel = st.container()
	col_run1, col_run2 = container_two_docs_rel.columns(2, gap="medium")
	relevant_docs = list(qrels[str(inst_num)].keys())[:n_relevant_docs]
	relevant_score = {ind_doc_id: qrels[str(inst_num)][ind_doc_id] for ind_doc_id in relevant_docs}
	doc_texts = [(doc_id, corpus[doc_id]["title"] if "title" in corpus[doc_id] else "", corpus[doc_id]["text"], relevant_score[doc_id]) for doc_id in relevant_docs]

	if doc_expansion1 is not None and run1_uses_doc_expansion != "None":
	show_orig_rel1 = col_run1.checkbox("Show Original Relevant Doc(s)", key=f"{inst_index}relorig_run1", value=False)
	if doc_expansion2 is not None and run2_uses_doc_expansion != "None":
	show_orig_rel2 = col_run2.checkbox("Show Original Relevant Doc(s)", key=f"{inst_index}relorig_run2", value=False)

	for (docid, title, text, rel_score) in doc_texts:
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel1:
	alt_text = doc_expansion1[docid]["text"]
	text = combine(text, alt_text, run1_uses_doc_expansion)

	if use_model_saliency:
	if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{docid}relevant", value=False):
	col_run1.markdown(get_saliency(query_text1, text),unsafe_allow_html=True)
	else:
	col_run1.text_area(f"{docid} (Rel: {rel_score}):", text, key=f"{inst_num}doc{docid}1")
	else:
	col_run1.text_area(f"{docid} (Rel: {rel_score}):", text, key=f"{inst_num}doc{docid}1")

	for (docid, title, text, rel_score) in doc_texts:
	if doc_expansion2 is not None and run2_uses_doc_expansion != "None" and not show_orig_rel2:
	alt_text = doc_expansion2[docid]["text"] if docid in doc_expansion2 else "<NOT EXPANDED>"
	text = combine(text, alt_text, run2_uses_doc_expansion)

	if use_model_saliency:
	if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{docid}relevant2", value=False):
	col_run2.markdown(get_saliency(query_text2, text),unsafe_allow_html=True)
	else:
	col_run2.text_area(f"{docid}: (Rel: {rel_score})", text, key=f"{inst_num}doc{docid}2")
	else:
	col_run2.text_area(f"{docid}: (Rel: {rel_score})", text, key=f"{inst_num}doc{docid}2")

	# top ranked
	# NOTE: BEIR calls trec_eval which ranks by score, then doc_id for ties
	# we have to fix that or we don't match the scores

	ranks2 = []
	for docid in relevant_docs:
	pred_doc = run2_pandas[run2_pandas.doc_id.isin([docid])]
	rank_pred = pred_doc[pred_doc.qid == str(inst_num)]
	if rank_pred.empty:
	ranks2.append("-")
	else:
	ranks2.append(rank_pred.iloc[0]["rank"])
	# st.subheader("Ranked of Documents")
	# st.markdown(f"Rank: {rank_pred}")
	ranking_str2 = ",".join([str(item) for item in ranks2])
	if ranking_str2 == "":
	ranking_str2 = "-"
	rank_col2.metric("Run 2 " + f"Rank of Relevant Doc(s)", ranking_str2)


	ranks1 = []
	for docid in relevant_docs:
	pred_doc = run1_pandas[run1_pandas.doc_id.isin([docid])]
	rank_pred = pred_doc[pred_doc.qid == str(inst_num)]
	if rank_pred.empty:
	ranks1.append("-")
	else:
	ranks1.append(rank_pred.iloc[0]["rank"])
	# st.subheader("Ranked of Documents")
	# st.markdown(f"Rank: {rank_pred}")
	ranking_str1 = ",".join([str(item) for item in ranks1])
	if ranking_str1 == "":
	ranking_str1 = "-"
	rank_col1.metric("Run 1 " + f"Rank of Relevant Doc(s)", ranking_str1)


	st.divider()


	container_two_docs_ranked = st.container()
	col_run1, col_run2 = container_two_docs_ranked.columns(2, gap="medium")

	if col_run1.checkbox('Show top ranked documents for Run 1', key=f"{inst_index}top-1run"):
	col_run1.subheader("Top N Ranked Documents")
	if doc_expansion1 is not None and run1_uses_doc_expansion != "None":
	show_orig_rel_ranked1 = col_run1.checkbox("Show Original Ranked Doc(s)", key=f"{inst_index}relorigdocs1", value=False)

	run1_top_n = run1_pandas[run1_pandas.qid == str(inst_num)].sort_values(["score", "doc_id"], ascending=[False, False])[:top_n]
	run1_top_n_docs = [corpus[str(doc_id)] for doc_id in run1_top_n.doc_id.tolist()]

	if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel_ranked1:
	run1_top_n_docs_alt = [doc_expansion1[str(doc_id)] for doc_id in run1_top_n.doc_id.tolist()]
	for d_idx, doc in enumerate(run1_top_n_docs):
	alt_text = run1_top_n_docs_alt[d_idx]["text"]
	doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
	if use_model_saliency:
	if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked1", value=False):
	col_run1.markdown(get_saliency(query_text1, doc_text),unsafe_allow_html=True)
	else:
	col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}1")
	else:
	col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}1")
	else:
	for d_idx, doc in enumerate(run1_top_n_docs):
	if use_model_saliency:
	if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked1", value=False):
	col_run1.markdown(get_saliency(query_text1, doc),unsafe_allow_html=True)
	else:
	col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}1")
	else:
	col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}1")


	if col_run2.checkbox('Show top ranked documents for Run 2', key=f"{inst_index}top-2run"):
	col_run2.subheader("Top N Ranked Documents")
	if doc_expansion2 is not None and run2_uses_doc_expansion != "None":
	show_orig_rel_ranked2 = col_run2.checkbox("Show Original Ranked Doc(s)", key=f"{inst_index}relorigdocs2", value=False)
	run2_top_n = run2_pandas[run2_pandas.qid == str(inst_num)].sort_values(["score", "doc_id"], ascending=[False, False])[:top_n]
	run2_top_n_docs = [corpus[str(doc_id)] for doc_id in run2_top_n.doc_id.tolist()]


	if doc_expansion2 is not None and run2_uses_doc_expansion != "None" and not show_orig_rel_ranked2:
	run2_top_n_docs_alt = [doc_expansion2[str(doc_id)] for doc_id in run2_top_n.doc_id.tolist()]
	for d_idx, doc in enumerate(run2_top_n_docs):
	alt_text = run2_top_n_docs_alt[d_idx]["text"]
	doc_text = combine(doc["text"], alt_text, run2_uses_doc_expansion)
	if use_model_saliency:
	if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked2", value=False):
	col_run2.markdown(get_saliency(query_text2, doc_text),unsafe_allow_html=True)
	else:
	col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}2")
	else:
	col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}2")
	else:
	for d_idx, doc in enumerate(run2_top_n_docs):
	if use_model_saliency:
	if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked2", value=False):
	col_run2.markdown(get_saliency(query_text2, doc),unsafe_allow_html=True)
	else:
	col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}2")
	else:
	col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}2")

	st.divider()


	else:
	st.title("Overview")

	st.subheader(f"Scores of {metric_name}")
	fig = create_boxplot_2df(results1, results2, metric_name)
	st.plotly_chart(fig)

	st.subheader(f"Score Difference of {metric_name}")
	fig_comp = create_boxplot_diff(results1, results2, metric_name)
	st.plotly_chart(fig_comp)

	else:
	st.warning("Please choose a dataset and upload a run file. If you chose \"custom\" be sure that you uploaded all files (queries, corpus, qrels)")