|
import streamlit as st |
|
from my_model.results.demo import ResultDemonstrator |
|
from my_model.config import evaluation_config as config |
|
|
|
|
|
def run_demo()-> None: |
|
""" |
|
Run the interactive Streamlit demo for visualizing model evaluation results and analysis. |
|
|
|
This function initializes the ResultDemonstrator class and sets up an interactive interface |
|
where users can choose to view either evaluation results & analysis or evaluation samples. |
|
Based on the user's selection, different aspects of the evaluation are displayed, such as |
|
main & ablation results, results per question category, or the impact of prompt length on performance. |
|
|
|
Returns: |
|
None |
|
""" |
|
|
|
demo = ResultDemonstrator() |
|
col1, col2 = st.columns([1, 4]) |
|
with col1: |
|
|
|
section_type = st.radio("Select Evaluation Aspect", ["Evaluation Results & Analysis", 'Evaluation Samples']) |
|
|
|
if section_type == "Evaluation Results & Analysis": |
|
analysis_type = st.radio("Select Type", ["Main & Ablation Results", "Results per Question Category", |
|
"Prompt Length (token count) Impact on Performance"], index=2) |
|
if analysis_type == "Prompt Length (token count) Impact on Performance": |
|
|
|
model_name = st.radio("Select Model Size", config.MODEL_NAMES) |
|
score_name = st.radio("Select Score Type", ["VQA Score", "Exact Match"]) |
|
elif section_type == 'Evaluation Samples': |
|
samples_button = st.button("Generate Random Samples") |
|
with col2: |
|
if section_type == "Evaluation Results & Analysis": |
|
if analysis_type == "Prompt Length (token count) Impact on Performance": |
|
for conf in config.MODEL_CONFIGURATIONS: |
|
with st.expander(conf): |
|
demo.plot_token_count_vs_scores(conf, model_name, score_name) |
|
elif analysis_type == "Main & Ablation Results": |
|
demo.display_main_results() |
|
elif analysis_type == "Results per Question Category": |
|
demo.display_ablation_results_per_question_category() |
|
elif section_type == 'Evaluation Samples': |
|
if samples_button: |
|
demo.show_samples(3) |