Spaces:

analist
/

Travel.Com

Sleeping

App Files Files Community

analist commited on Jan 8

Commit

ee9aa01

verified ·

1 Parent(s): 00994c4

Update app.py

Browse files

Files changed (1) hide show

app.py +190 -126

app.py CHANGED Viewed

@@ -96,148 +96,212 @@ def plot_feature_importance(model, feature_names, model_type):
     plt.title(f"Feature Importance - {model_type}")
     return plt.gcf()
 def app():
-    st.title("Interpréteur de Modèles ML")
     # Load data
     X_train, y_train, X_test, y_test, feature_names = load_data()
     # Train models if not in session state
     if 'model_results' not in st.session_state:
-        with st.spinner("Entraînement des modèles en cours..."):
             st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
-    # Sidebar
-    st.sidebar.title("Navigation")
-    selected_model = st.sidebar.selectbox(
-        "Sélectionnez un modèle",
-        list(st.session_state.model_results.keys())
-    )
-    page = st.sidebar.radio(
-        "Sélectionnez une section",
-        ["Performance des modèles",
-         "Interprétation du modèle",
-         "Analyse des caractéristiques",
-         "Simulateur de prédictions"]
-    )
     current_model = st.session_state.model_results[selected_model]['model']
-    # Performance des modèles
-    if page == "Performance des modèles":
-        st.header("Performance des modèles")
-        # Plot global performance comparison
-        st.subheader("Comparaison des performances")
-        performance_fig = plot_model_performance(st.session_state.model_results)
-        st.pyplot(performance_fig)
-        # Detailed metrics for selected model
-        st.subheader(f"Métriques détaillées - {selected_model}")
-        col1, col2 = st.columns(2)
-        with col1:
-            st.write("Métriques d'entraînement:")
-            for metric, value in st.session_state.model_results[selected_model]['train_metrics'].items():
-                st.write(f"{metric}: {value:.4f}")
-        with col2:
-            st.write("Métriques de test:")
-            for metric, value in st.session_state.model_results[selected_model]['test_metrics'].items():
-                st.write(f"{metric}: {value:.4f}")
-    # Interprétation du modèle
-    elif page == "Interprétation du modèle":
-        st.header(f"Interprétation du modèle - {selected_model}")
-        if selected_model in ["Decision Tree", "Random Forest"]:
-            if selected_model == "Decision Tree":
-                st.subheader("Visualisation de l'arbre")
-                max_depth = st.slider("Profondeur maximale à afficher", 1, 5, 3)
-                fig, ax = plt.subplots(figsize=(20, 10))
-                plot_tree(current_model, feature_names=list(feature_names),
-                         max_depth=max_depth, filled=True, rounded=True)
-                st.pyplot(fig)
-            st.subheader("Règles de décision importantes")
-            if selected_model == "Decision Tree":
-                st.text(export_text(current_model, feature_names=list(feature_names)))
-        # SHAP values for all models
-        st.subheader("SHAP Values")
-        with st.spinner("Calcul des valeurs SHAP en cours..."):
-            explainer = shap.TreeExplainer(current_model) if selected_model != "Logistic Regression" \
-                       else shap.LinearExplainer(current_model, X_train)
-            shap_values = explainer.shap_values(X_train[:100])  # Using first 100 samples for speed
-            fig, ax = plt.subplots(figsize=(10, 6))
-            shap.summary_plot(shap_values, X_train[:100], feature_names=list(feature_names),
-                            show=False)
-            st.pyplot(fig)
-    # Analyse des caractéristiques
-    elif page == "Analyse des caractéristiques":
-        st.header("Analyse des caractéristiques")
-        # Feature importance
-        st.subheader("Importance des caractéristiques")
-        importance_fig = plot_feature_importance(current_model, feature_names, selected_model)
-        st.pyplot(importance_fig)
-        # Feature correlation
-        st.subheader("Matrice de corrélation")
-        corr_matrix = X_train.corr()
-        fig, ax = plt.subplots(figsize=(10, 8))
-        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
-        st.pyplot(fig)
-    # Simulateur de prédictions
-    else:
-        st.header("Simulateur de prédictions")
-        input_values = {}
-        for feature in feature_names:
-            if X_train[feature].dtype == 'object':
-                input_values[feature] = st.selectbox(
-                    f"Sélectionnez {feature}",
-                    options=X_train[feature].unique()
-                )
-            else:
-                input_values[feature] = st.slider(
-                    f"Valeur pour {feature}",
-                    float(X_train[feature].min()),
-                    float(X_train[feature].max()),
-                    float(X_train[feature].mean())
-                )
-        if st.button("Prédire"):
-            input_df = pd.DataFrame([input_values])
-            prediction = current_model.predict_proba(input_df)
-            st.write("Probabilités prédites:")
-            st.write({f"Classe {i}": f"{prob:.2%}" for i, prob in enumerate(prediction[0])})
-            if selected_model == "Decision Tree":
-                st.subheader("Chemin de décision")
-                node_indicator = current_model.decision_path(input_df)
-                leaf_id = current_model.apply(input_df)
-                node_index = node_indicator.indices[node_indicator.indptr[0]:node_indicator.indptr[1]]
-                rules = []
-                for node_id in node_index:
-                    if node_id != leaf_id[0]:
-                        threshold = current_model.tree_.threshold[node_id]
-                        feature = feature_names[current_model.tree_.feature[node_id]]
-                        if input_df.iloc[0][feature] <= threshold:
-                            rules.append(f"{feature} ≤ {threshold:.2f}")
-                        else:
-                            rules.append(f"{feature} > {threshold:.2f}")
-                for rule in rules:
-                    st.write(rule)
 if __name__ == "__main__":
     app()

     plt.title(f"Feature Importance - {model_type}")
     return plt.gcf()
+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.tree import plot_tree, export_text
+import seaborn as sns
+from sklearn.preprocessing import LabelEncoder
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
+import shap
+# Configuration de la page et du thème
+st.set_page_config(
+    page_title="ML Model Interpreter",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# CSS personnalisé
+st.markdown("""
+<style>
+    /* Couleurs principales */
+    :root {
+        --primary-blue: #1E88E5;
+        --light-blue: #90CAF9;
+        --dark-blue: #0D47A1;
+        --white: #FFFFFF;
+    }
+    /* En-tête principal */
+    .main-header {
+        color: var(--dark-blue);
+        text-align: center;
+        padding: 1rem;
+        background: linear-gradient(90deg, var(--white) 0%, var(--light-blue) 50%, var(--white) 100%);
+        border-radius: 10px;
+        margin-bottom: 2rem;
+    }
+    /* Carte pour les métriques */
+    .metric-card {
+        background-color: white;
+        padding: 1.5rem;
+        border-radius: 10px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        margin-bottom: 1rem;
+    }
+    /* Style pour les sous-titres */
+    .sub-header {
+        color: var(--primary-blue);
+        border-bottom: 2px solid var(--light-blue);
+        padding-bottom: 0.5rem;
+        margin-bottom: 1rem;
+    }
+    /* Style pour les valeurs de métriques */
+    .metric-value {
+        font-size: 1.5rem;
+        font-weight: bold;
+        color: var(--primary-blue);
+    }
+    /* Style pour la barre latérale */
+    .sidebar .sidebar-content {
+        background-color: var(--white);
+    }
+    /* Style pour les boutons */
+    .stButton > button {
+        background-color: var(--primary-blue);
+        color: white;
+        border-radius: 5px;
+        border: none;
+        padding: 0.5rem 1rem;
+    }
+    /* Style pour les sliders */
+    .stSlider > div > div {
+        background-color: var(--light-blue);
+    }
+    /* Style pour les selectbox */
+    .stSelectbox > div > div {
+        background-color: white;
+        border: 1px solid var(--light-blue);
+    }
+</style>
+""", unsafe_allow_html=True)
+def custom_metric_card(title, value, prefix=""):
+    return f"""
+    <div class="metric-card">
+        <h3 style="color: #1E88E5; margin-bottom: 0.5rem;">{title}</h3>
+        <p class="metric-value">{prefix}{value:.4f}</p>
+    </div>
+    """
+def plot_with_style(fig):
+    # Style matplotlib
+    plt.style.use('seaborn')
+    fig.patch.set_facecolor('#FFFFFF')
+    for ax in fig.axes:
+        ax.set_facecolor('#F8F9FA')
+        ax.grid(True, linestyle='--', alpha=0.7)
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+    return fig
+# [Fonctions load_data et train_models restent identiques]
+def plot_model_performance(results):
+    metrics = ['accuracy', 'f1', 'precision', 'recall', 'roc_auc']
+    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
+    # Configuration du style
+    plt.style.use('seaborn')
+    colors = ['#1E88E5', '#90CAF9', '#0D47A1', '#42A5F5']
+    # Training metrics
+    train_data = {model: [results[model]['train_metrics'][metric] for metric in metrics]
+                 for model in results.keys()}
+    train_df = pd.DataFrame(train_data, index=metrics)
+    train_df.plot(kind='bar', ax=axes[0], title='Performance d\'Entraînement',
+                 color=colors)
+    axes[0].set_ylim(0, 1)
+    # Test metrics
+    test_data = {model: [results[model]['test_metrics'][metric] for metric in metrics]
+                for model in results.keys()}
+    test_df = pd.DataFrame(test_data, index=metrics)
+    test_df.plot(kind='bar', ax=axes[1], title='Performance de Test',
+                color=colors)
+    axes[1].set_ylim(0, 1)
+    # Style des graphiques
+    for ax in axes:
+        ax.set_facecolor('#F8F9FA')
+        ax.grid(True, linestyle='--', alpha=0.7)
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
+    plt.tight_layout()
+    return fig
 def app():
+    # En-tête principal avec style personnalisé
+    st.markdown('<h1 class="main-header">Interpréteur de Modèles ML</h1>', unsafe_allow_html=True)
     # Load data
     X_train, y_train, X_test, y_test, feature_names = load_data()
     # Train models if not in session state
     if 'model_results' not in st.session_state:
+        with st.spinner("🔄 Entraînement des modèles en cours..."):
             st.session_state.model_results = train_models(X_train, y_train, X_test, y_test)
+    # Sidebar avec style personnalisé
+    with st.sidebar:
+        st.markdown('<h2 style="color: #1E88E5;">Navigation</h2>', unsafe_allow_html=True)
+        selected_model = st.selectbox(
+            "📊 Sélectionnez un modèle",
+            list(st.session_state.model_results.keys())
+        )
+        st.markdown('<hr style="margin: 1rem 0;">', unsafe_allow_html=True)
+        page = st.radio(
+            "📑 Sélectionnez une section",
+            ["Performance des modèles",
+             "Interprétation du modèle",
+             "Analyse des caractéristiques",
+             "Simulateur de prédictions"]
+        )
     current_model = st.session_state.model_results[selected_model]['model']
+    # Container principal avec padding
+    main_container = st.container()
+    with main_container:
+        if page == "Performance des modèles":
+            st.markdown('<h2 class="sub-header">Performance des modèles</h2>', unsafe_allow_html=True)
+            # Graphiques de performance
+            performance_fig = plot_model_performance(st.session_state.model_results)
+            st.pyplot(plot_with_style(performance_fig))
+            # Métriques détaillées dans des cartes
+            st.markdown('<h3 class="sub-header">Métriques détaillées</h3>', unsafe_allow_html=True)
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown('<h4 style="color: #1E88E5;">Entraînement</h4>', unsafe_allow_html=True)
+                for metric, value in st.session_state.model_results[selected_model]['train_metrics'].items():
+                    st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True)
+            with col2:
+                st.markdown('<h4 style="color: #1E88E5;">Test</h4>', unsafe_allow_html=True)
+                for metric, value in st.session_state.model_results[selected_model]['test_metrics'].items():
+                    st.markdown(custom_metric_card(metric.capitalize(), value), unsafe_allow_html=True)
+        # [Le reste des sections avec style adapté...]
 if __name__ == "__main__":
     app()