import streamlit as st from my_model.results.demo import ResultDemonstrator from my_model.config import evaluation_config as config def run_demo()-> None: """ Run the interactive Streamlit demo for visualizing model evaluation results and analysis. This function initializes the ResultDemonstrator class and sets up an interactive interface where users can choose to view either evaluation results & analysis or evaluation samples. Based on the user's selection, different aspects of the evaluation are displayed, such as main & ablation results, results per question category, or the impact of prompt length on performance. Returns: None """ demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class col1, col2 = st.columns([1, 4]) with col1: # User selects the evaluation analysis aspect section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) # Only show analysis type if the section type is "Evaluation Results & Analysis" if section_type == "Evaluation Results & Analysis": analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", "Prompt Length (token count) Impact on Performance"], index=2) if analysis_type == "Prompt Length (token count) Impact on Performance": # Based on the selection, other options appear model_name = st.radio("Select Model Size", config.MODEL_NAMES) score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) elif section_type == 'Evaluation Samples': samples_button = st.button("Generate Random Samples") with col2: if section_type == "Evaluation Results & Analysis": if analysis_type == "Prompt Length (token count) Impact on Performance": for conf in config.MODEL_CONFIGURATIONS: with st.expander(conf): demo.plot_token_count_vs_scores(conf, model_name, score_name) elif analysis_type == "Main & Ablation Results": demo.display_main_results() elif analysis_type == "Results per Question Category": demo.display_ablation_results_per_question_category() elif section_type == 'Evaluation Samples': if samples_button: demo.show_samples(3)