KB-VQA

Sleeping

App Files Files Community

KB-VQA / my_model /tabs /results.py

m7mdal7aj

Update my_model/tabs/results.py

6d94c7b verified 8 months ago

raw

history blame

2.51 kB

	import streamlit as st
	from my_model.results.demo import ResultDemonstrator
	from my_model.config import evaluation_config as config


	def run_demo()-> None:
	"""
	Run the interactive Streamlit demo for visualizing model evaluation results and analysis.

	This function initializes the ResultDemonstrator class and sets up an interactive interface
	where users can choose to view either evaluation results & analysis or evaluation samples.
	Based on the user's selection, different aspects of the evaluation are displayed, such as
	main & ablation results, results per question category, or the impact of prompt length on performance.

	Returns:
	None
	"""

	demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class
	col1, col2 = st.columns([1, 4])
	with col1:
	# User selects the evaluation analysis aspect
	section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples'])
	# Only show analysis type if the section type is "Evaluation Results & Analysis"
	if section_type == "Evaluation Results & Analysis":
	analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category",
	"Prompt Length (token count) Impact on Performance"], index=2)
	if analysis_type == "Prompt Length (token count) Impact on Performance":
	# Based on the selection, other options appear
	model_name = st.radio("Select Model Size", config.MODEL_NAMES)
	score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"])
	elif section_type == 'Evaluation Samples':
	samples_button = st.button("Generate Random Samples")
	with col2:
	if section_type == "Evaluation Results & Analysis":
	if analysis_type == "Prompt Length (token count) Impact on Performance":
	for conf in config.MODEL_CONFIGURATIONS:
	with st.expander(conf):
	demo.plot_token_count_vs_scores(conf, model_name, score_name)
	elif analysis_type == "Main & Ablation Results":
	demo.display_main_results()
	elif analysis_type == "Results per Question Category":
	demo.display_ablation_results_per_question_category()
	elif section_type == 'Evaluation Samples':
	if samples_button:
	demo.show_samples(3)