import streamlit as st from my_model.results.demo import ResultDemonstrator from my_model.config import evaluation_config as config def run_demo(): """ Run the interactive Streamlit demo for visualizing model evaluation results and analysis. """ demo = ResultDemonstrator() # Instantiate the ResultDemonstrator class col1, col2 = st.columns([1, 4]) with col1: # User selects the evaluation analysis aspect section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) # Only show analysis type if the section type is "Evaluation Results & Analysis" if section_type == "Evaluation Results & Analysis": analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", "Prompt Length (token count) Impact on Performance"], index=2) if analysis_type == "Prompt Length (token count) Impact on Performance": # Based on the selection, other options appear model_name = st.radio("Select Model Size", config.MODEL_NAMES) score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) elif section_type == 'Evaluation Samples': samples_button = st.button("Generate Random Samples") with col2: if section_type == "Evaluation Results & Analysis": if analysis_type == "Prompt Length (token count) Impact on Performance": for conf in config.MODEL_CONFIGURATIONS: with st.expander(conf): demo.plot_token_count_vs_scores(conf, model_name, score_name) elif analysis_type == "Main & Ablation Results": demo.display_main_results() elif analysis_type == "Results per Question Category": demo.display_ablation_results_per_question_category() elif section_type == 'Evaluation Samples': if samples_button: demo.show_samples(3)