import streamlit as st import pandas as pd import plotly.express as px import numpy as np import seaborn as sns import matplotlib.pyplot as plt # Set page config st.set_page_config(page_title="ML Models Comparison Dashboard", layout="wide") # Title and description st.title("Machine Learning Models Comparison Dashboard") st.write("Compare performance metrics of different ML models on the CIFAR-10 dataset") # Pre-computed metrics results = { 'Accuracy': { 'KNN': 0.331, 'Logistic Regression': 0.368, 'Random Forest': 0.466, 'Naive Bayes': 0.298, 'K-Means': 0.109, 'CNN': 0.694 }, 'Precision': { 'KNN': 0.342, 'Logistic Regression': 0.387, 'Random Forest': 0.409, 'Naive Bayes': 0.295, 'K-Means': 0.271, 'CNN': 0.453 }, 'Recall': { 'KNN': 0.345, 'Logistic Regression': 0.389, 'Random Forest': 0.412, 'Naive Bayes': 0.298, 'K-Means': 0.275, 'CNN': 0.456 }, 'F1': { 'KNN': 0.343, 'Logistic Regression': 0.388, 'Random Forest': 0.410, 'Naive Bayes': 0.296, 'K-Means': 0.273, 'CNN': 0.454 } } # Confusion matrices data confusion_matrices = { 'CNN': np.array([ [672, 27, 78, 21, 21, 3, 3, 13, 116, 46], [20, 807, 3, 15, 8, 2, 7, 1, 22, 115], [54, 4, 593, 82, 144, 36, 28, 32, 18, 9], [17, 8, 73, 586, 100, 108, 38, 34, 12, 24], [19, 0, 53, 69, 720, 14, 15, 90, 15, 5], [5, 3, 89, 300, 58, 458, 6, 64, 9, 8], [3, 9, 55, 122, 118, 13, 653, 6, 7, 14], [17, 3, 30, 74, 70, 36, 0, 754, 1, 15], [41, 24, 11, 20, 9, 3, 6, 8, 844, 34], [20, 51, 4, 22, 9, 3, 6, 12, 25, 848] ]), 'K-Means': np.array([ [106, 109, 62, 41, 139, 81, 33, 185, 211, 33], [97, 184, 92, 141, 96, 159, 106, 25, 42, 58], [54, 69, 252, 160, 42, 46, 120, 109, 84, 64], [83, 120, 146, 154, 18, 60, 121, 87, 61, 150], [39, 49, 245, 219, 19, 51, 117, 73, 20, 168], [72, 185, 163, 103, 30, 35, 88, 132, 38, 154], [86, 94, 211, 212, 11, 28, 206, 30, 39, 83], [111, 88, 205, 131, 54, 135, 58, 57, 22, 139], [31, 167, 46, 28, 328, 195, 33, 91, 39, 42], [131, 81, 83, 123, 141, 331, 19, 18, 37, 36] ]), 'KNN': np.array([ [565, 12, 107, 20, 52, 6, 25, 4, 205, 4], [195, 244, 121, 61, 120, 28, 31, 4, 178, 18], [150, 7, 463, 58, 201, 24, 48, 10, 39, 0], [108, 10, 279, 243, 133, 92, 80, 17, 32, 6], [100, 6, 282, 54, 443, 23, 38, 11, 43, 0], [89, 4, 254, 178, 143, 208, 68, 10, 41, 5], [49, 1, 317, 102, 260, 24, 227, 2, 18, 0], [127, 16, 226, 76, 236, 40, 45, 192, 41, 1], [181, 31, 56, 45, 52, 11, 8, 4, 607, 5], [192, 81, 127, 80, 117, 27, 41, 14, 205, 116] ]), 'Logistic Regression': np.array([ [424, 51, 58, 50, 25, 41, 17, 56, 202, 76], [72, 426, 35, 48, 26, 36, 48, 47, 78, 184], [96, 34, 266, 90, 123, 94, 130, 84, 51, 32], [43, 52, 115, 235, 72, 194, 131, 56, 43, 59], [55, 35, 137, 80, 280, 97, 152, 112, 25, 27], [41, 41, 103, 202, 90, 300, 77, 64, 45, 37], [14, 47, 97, 147, 94, 94, 417, 42, 23, 25], [47, 47, 91, 70, 97, 91, 47, 397, 45, 68], [146, 85, 31, 37, 17, 41, 10, 18, 513, 102], [78, 180, 26, 36, 30, 29, 45, 59, 98, 419] ]), 'Naive Bayes': np.array([ [494, 20, 39, 10, 84, 34, 50, 9, 200, 60], [141, 166, 24, 31, 66, 72, 192, 19, 121, 168], [225, 24, 83, 15, 292, 48, 209, 21, 54, 29], [163, 36, 54, 76, 151, 129, 262, 26, 34, 69], [86, 8, 57, 26, 417, 38, 265, 22, 50, 31], [156, 17, 55, 51, 167, 264, 159, 36, 57, 38], [106, 2, 60, 18, 228, 46, 467, 15, 19, 39], [134, 24, 36, 41, 228, 94, 102, 131, 72, 138], [168, 41, 18, 17, 56, 83, 39, 8, 471, 99], [144, 67, 17, 20, 48, 32, 101, 23, 141, 407] ]), 'Random Forest': np.array([ [559, 36, 62, 21, 28, 20, 20, 30, 165, 59], [29, 544, 10, 38, 22, 34, 45, 35, 67, 176], [100, 36, 337, 79, 137, 69, 123, 54, 34, 31], [53, 46, 76, 282, 88, 173, 132, 62, 23, 65], [54, 21, 139, 60, 381, 47, 158, 91, 27, 22], [32, 27, 89, 167, 74, 400, 77, 74, 27, 33], [11, 33, 87, 78, 99, 53, 567, 31, 6, 35], [47, 44, 53, 58, 104, 83, 38, 451, 24, 98], [90, 83, 19, 33, 15, 39, 7, 22, 615, 77], [50, 176, 18, 38, 20, 24, 27, 43, 80, 524] ]) } # Create tabs for different visualizations tab1, tab2, tab3 = st.tabs(["Overall Performance", "Confusion Matrices", "Individual Metrics"]) with tab1: st.header("Overall Model Performance") # Create bar plot for overall accuracy fig, ax = plt.subplots(figsize=(12, 6)) models = list(results['Accuracy'].keys()) accuracies = list(results['Accuracy'].values()) colors = ['purple', 'navy', 'teal', 'green', 'lime', 'yellow'] bars = ax.bar(models, accuracies, color=colors) # Customize the plot ax.set_title('Overall Model Performance Comparison') ax.set_xlabel('Models') ax.set_ylabel('Accuracy') plt.xticks(rotation=45) # Add value labels on top of bars for bar in bars: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height, f'{height:.3f}', ha='center', va='bottom') plt.tight_layout() st.pyplot(fig) with tab2: st.header("Confusion Matrices") # Model selection for confusion matrix selected_model = st.selectbox("Select Model", list(confusion_matrices.keys())) # Create confusion matrix plot using seaborn fig, ax = plt.subplots(figsize=(10, 8)) sns.heatmap(confusion_matrices[selected_model], annot=True, fmt='d', cmap='Blues', ax=ax) plt.title(f'Confusion Matrix - {selected_model}') plt.xlabel('Predicted') plt.ylabel('True') plt.tight_layout() st.pyplot(fig) with tab3: st.header("Individual Metrics") col1, col2, col3 = st.columns(3) metrics = ['Precision', 'Recall', 'F1'] for metric, col in zip(metrics, [col1, col2, col3]): with col: fig, ax = plt.subplots(figsize=(8, 6)) models = list(results[metric].keys()) values = list(results[metric].values()) ax.bar(models, values) ax.set_title(f'Comparison of {metric}') plt.xticks(rotation=45, ha='right') ax.set_ylabel(metric) plt.tight_layout() st.pyplot(fig) # Add metrics table to sidebar st.sidebar.header("Metrics Table") df_metrics = pd.DataFrame(results) st.sidebar.dataframe(df_metrics.style.format("{:.3f}")) # Add download button @st.cache_data def convert_df_to_csv(): return df_metrics.to_csv() csv = convert_df_to_csv() st.sidebar.download_button( label="Download metrics as CSV", data=csv, file_name='model_metrics.csv', mime='text/csv', ) # Footer st.markdown("---") st.markdown("Dashboard created for ML Models Comparison on CIFAR-10 dataset")