Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from io import BytesIO # Importa BytesIO per gestire file in memoria | |
try: | |
import scipy.stats # Per correlazione spearman opzionale | |
SCIPY_AVAILABLE = True | |
except ImportError: | |
SCIPY_AVAILABLE = False | |
# Sposta l'avviso della libreria scipy dopo il caricamento del file, | |
# così non appare se non viene caricato nessun file. | |
# st.sidebar.warning("Libreria 'scipy' non trovata...") # Rimosso da qui | |
# --- Configuration --- | |
st.set_page_config(layout="wide", page_title="Dashboard Analisi Clima") | |
# --- Constants & Helper Functions --- | |
SCORE_BUCKETS = { | |
(0, 2.5): "Critico", | |
(2.5, 4.5): "Neutrale", | |
(4.5, 7): "Positivo" # Assumendo scala fino a 6, ma 7 copre > 4.5 | |
} | |
BUCKET_COLORS = {"Critico": "#d62728", "Neutrale": "#ff7f0e", "Positivo": "#2ca02c"} | |
THRESHOLD_LOW = 3.0 # Leggermente aggiustato per bullet chart | |
THRESHOLD_HIGH = 4.5 # Leggermente aggiustato per bullet chart | |
PLOTLY_TEMPLATE = "plotly_white" # "seaborn" #"plotly_dark" # "ggplot2" # "plotly_white" | |
def categorize_score(score): | |
if pd.isna(score): | |
return "Non Risposto" | |
# Ajust range slightly to handle edge cases like 2.5 exactly | |
if 0 <= score <= 2.5: return "Critico" | |
if 2.5 < score <= 4.5: return "Neutrale" | |
if 4.5 < score <= 7: return "Positivo" # Assuming max score is around 6 | |
return "Sconosciuto" # Should not happen with numeric data in expected range | |
# Modifica la funzione per accettare l'oggetto file caricato invece del percorso | |
def load_and_prepare_data(uploaded_file_object): | |
if uploaded_file_object is None: | |
return None, None, None, None, None, None, None | |
try: | |
# Legge direttamente dall'oggetto file in memoria | |
# Explicitly try different encodings if default fails | |
try: | |
# Usa BytesIO per permettere a read_csv di rileggere se necessario | |
file_content = BytesIO(uploaded_file_object.getvalue()) | |
df_orig = pd.read_csv(file_content, delimiter=';', encoding='utf-8') | |
except UnicodeDecodeError: | |
try: | |
file_content.seek(0) # Riavvolgi il buffer | |
df_orig = pd.read_csv(file_content, delimiter=';', encoding='latin-1') | |
except UnicodeDecodeError: | |
file_content.seek(0) # Riavvolgi il buffer | |
df_orig = pd.read_csv(file_content, delimiter=';', encoding='iso-8859-1') | |
# Rimuovi FileNotFoundError dato che non usiamo più un percorso fisso | |
# except FileNotFoundError: | |
# st.error(f"Errore: File non trovato...") # Rimosso | |
# return None, None, None, None, None, None, None | |
except Exception as e: | |
st.error(f"Errore durante la lettura del CSV caricato: {e}") | |
return None, None, None, None, None, None, None | |
# --- Il resto della funzione di preparazione dati rimane invariato --- | |
original_columns = df_orig.columns.tolist() | |
unnamed_cols = [col for col in df_orig.columns if str(col).startswith('Unnamed:')] | |
df = df_orig.drop(columns=unnamed_cols) | |
cleaned_original_columns = df.columns.tolist() # Update after drop | |
header_row_index = 0 # Assuming header is the first row after loading | |
new_header = df.iloc[header_row_index].tolist() | |
df = df[header_row_index + 1:].reset_index(drop=True) | |
# Clean the header: replace NaN/None with placeholders, ensure strings, strip whitespace | |
cleaned_header = [] | |
for i, col in enumerate(new_header): | |
col_str = str(col).strip() if pd.notna(col) else "" | |
if not col_str: # If empty after stripping | |
if i < len(cleaned_original_columns) and not cleaned_original_columns[i].startswith('Unnamed:'): | |
cleaned_header.append(str(cleaned_original_columns[i]).strip()) # Use original name if meaningful | |
else: | |
cleaned_header.append(f"Colonna_Sconosciuta_{i}") # Placeholder | |
else: | |
cleaned_header.append(col_str) | |
# *** START: Enhanced Duplicate Column Handling *** | |
counts = {} | |
final_header = [] | |
original_to_final_map = {} # Map original cleaned name to final unique name | |
for i, col_name in enumerate(cleaned_header): | |
original_name = col_name # Keep track of the name before potential suffix | |
if col_name in counts: | |
counts[col_name] += 1 | |
new_name = f"{col_name}_{counts[col_name]}" | |
final_header.append(new_name) | |
# Store mapping if original name was intended as a question | |
# Heuristic: assume non-demographic columns are potential questions | |
if i >= 3: # Assuming first 3 are demo - adjust if needed | |
original_to_final_map[original_name] = original_to_final_map.get(original_name, []) + [new_name] | |
else: | |
counts[col_name] = 0 | |
final_header.append(col_name) | |
if i >= 3: | |
original_to_final_map[original_name] = [col_name] # First occurrence | |
df.columns = final_header | |
# *** END: Enhanced Duplicate Column Handling *** | |
# --- Category Mapping --- | |
def get_category_from_original(original_col_name, potential_category_source): | |
col_name_str = str(original_col_name).strip() | |
source_str = str(potential_category_source).strip() | |
if pd.notna(potential_category_source) and not source_str.isdigit() and 'domanda' not in source_str.lower(): | |
base_name = source_str.split('.')[0].strip() | |
if base_name: return base_name | |
if '.' in col_name_str: | |
base_name = col_name_str.split('.')[0].strip() | |
suffix = col_name_str.split('.')[-1] | |
if suffix.isdigit(): | |
if base_name: return base_name | |
elif not col_name_str.isdigit() and 'domanda' not in col_name_str.lower(): | |
if col_name_str: return col_name_str | |
return "Categoria Sconosciuta" | |
question_to_category_map = {} | |
demographic_indices = list(range(min(3, len(final_header)))) # Safer range for demo indices | |
for i, final_col_name in enumerate(final_header): | |
if i not in demographic_indices: | |
# Find the original cleaned header name before potential suffix was added | |
original_cleaned_name = final_col_name | |
if '_' in final_col_name: | |
parts = final_col_name.rsplit('_', 1) | |
if parts[1].isdigit() and int(parts[1]) == counts.get(parts[0], -1): | |
original_cleaned_name = parts[0] | |
# Use original column name from the CSV *before* taking row 0 as header for category inference | |
original_csv_col = cleaned_original_columns[i] if i < len(cleaned_original_columns) else original_cleaned_name | |
category = get_category_from_original(original_csv_col, original_csv_col) | |
category = category.replace("Parità di genere", "Parità Genere") | |
question_to_category_map[final_col_name] = category # Map the *final unique* column name | |
# --- Demographic Columns --- | |
demographic_map = {} | |
if len(final_header) > 0: demographic_map[final_header[0]] = 'Genere' | |
if len(final_header) > 1: demographic_map[final_header[1]] = 'Fascia_Eta' | |
if len(final_header) > 2: demographic_map[final_header[2]] = 'Sede' | |
# Check if default demo columns actually exist before renaming | |
valid_demo_map = {k: v for k, v in demographic_map.items() if k in df.columns} | |
df.rename(columns=valid_demo_map, inplace=True) | |
demographic_cols = list(valid_demo_map.values()) | |
# Filter out potential summary rows | |
if 'Sede' in df.columns: | |
anomalous_sede = ['Media', 'Mediana', 'Media sezione', 'Totale', 'Scarto quadratico medio'] | |
df = df[~df['Sede'].astype(str).str.strip().str.lower().isin([s.lower() for s in anomalous_sede])] | |
# Fill missing demographic data | |
for col in demographic_cols: | |
if col in df.columns: | |
df[col] = df[col].astype(str).fillna('Non specificato').replace(['nan', 'None', ''], 'Non specificato') | |
# Identify question columns based on the map (using final unique names) | |
question_cols = list(question_to_category_map.keys()) | |
question_cols = [col for col in question_cols if col in df.columns] # Ensure they exist | |
# --- Type Conversion --- | |
for col in question_cols: | |
if df[col].dtype == 'object': | |
df[col] = df[col].astype(str).str.replace(',', '.', regex=False) | |
df[col] = df[col].replace(['nan', 'N/A', '', '-', 'None'], np.nan, regex=False) | |
df[col] = pd.to_numeric(df[col], errors='coerce') | |
numeric_question_cols = df[question_cols].select_dtypes(include=np.number).columns.tolist() | |
# Determine response scale dynamically | |
response_scale = (1, 6) # Default fallback | |
if numeric_question_cols: | |
valid_numeric_cols = [col for col in numeric_question_cols if col in df.columns] | |
if valid_numeric_cols: | |
# Drop rows where ALL numeric questions are NaN before calculating min/max | |
df_numeric_only = df[valid_numeric_cols].dropna(how='all') | |
if not df_numeric_only.empty: | |
min_val = df_numeric_only.min(skipna=True).min(skipna=True) | |
max_val = df_numeric_only.max(skipna=True).max(skipna=True) | |
if pd.notna(min_val) and pd.notna(max_val): | |
response_scale = (min_val, max_val) | |
# --- Identify Overall Satisfaction Question --- | |
overall_satisfaction_question = None | |
possible_satisfaction_cats = ['Riepilogo', 'Generale', 'Soddisfazione Complessiva'] | |
# Use final unique names from numeric_question_cols | |
possible_satisfaction_cols = [q for q in numeric_question_cols | |
if question_to_category_map.get(q) in possible_satisfaction_cats] | |
if possible_satisfaction_cols: | |
overall_satisfaction_question = possible_satisfaction_cols[0] | |
else: | |
keywords = ['soddisfazione', 'complessivamente', 'generale', 'valutazione'] | |
for q in numeric_question_cols: | |
# Check original cleaned name for keywords if available, else the final name | |
original_cleaned_name = q.rsplit('_', 1)[0] if '_' in q and q.rsplit('_', 1)[1].isdigit() else q | |
q_check = original_cleaned_name.lower() # Check original name primarily | |
if any(keyword in q_check for keyword in keywords): | |
overall_satisfaction_question = q # Assign the final unique name | |
st.info(f"Domanda soddisfazione generale identificata: '{q}' (basata su '{original_cleaned_name}')") | |
break | |
if not overall_satisfaction_question and numeric_question_cols: | |
st.warning("Impossibile identificare automaticamente la domanda sulla soddisfazione generale. Alcune analisi potrebbero essere limitate.") | |
return df, demographic_cols, question_cols, question_to_category_map, numeric_question_cols, response_scale, overall_satisfaction_question | |
# --- Inizio Script Principale --- | |
# Aggiungi il widget per caricare il file | |
st.sidebar.title('Sondaggio') | |
uploaded_file = st.sidebar.file_uploader("Carica il tuo file CSV", type="csv") | |
st.sidebar.divider() | |
# Procedi solo se un file è stato caricato | |
if uploaded_file is not None: | |
# Sposta l'avviso della libreria scipy qui, così appare solo se si procede | |
if not SCIPY_AVAILABLE: | |
st.sidebar.warning("Libreria 'scipy' non trovata. La correlazione Spearman non sarà disponibile. Installa con: pip install scipy") | |
# --- Load Data --- | |
# Chiama la funzione di caricamento passando l'oggetto file caricato | |
try: | |
df_full, demographic_cols, question_cols, question_to_category_map, numeric_question_cols, response_scale, overall_satisfaction_question = load_and_prepare_data(uploaded_file) | |
if df_full is None: | |
st.error("Caricamento o preparazione dati fallito. Controlla il file CSV.") | |
st.stop() # Ferma l'esecuzione se il caricamento fallisce | |
elif df_full.empty: | |
st.warning("Il file CSV caricato risulta vuoto dopo la pulizia iniziale.") | |
# Si potrebbe fermare qui o continuare mostrando avvisi di dati vuoti | |
# st.stop() | |
except Exception as e: | |
st.error(f"Errore critico durante l'inizializzazione dei dati dal file caricato: {e}") | |
st.exception(e) # Stampa traceback completo per debug | |
st.stop() # Ferma l'esecuzione in caso di errore critico | |
# --- DA QUI IN POI, IL CODICE DEL DASHBOARD RIMANE INVARIATO --- | |
# --- MA VIENE ESEGUITO SOLO SE uploaded_file IS NOT None --- | |
# --- App Title --- | |
st.title("🚀 Dashboard Analisi Clima") | |
# ============================================================================== | |
# --- Sidebar --- | |
# ============================================================================== | |
st.sidebar.title("Filtri & Controlli") | |
st.sidebar.subheader("👤 Filtri Demografici") | |
selected_filters = {} | |
if demographic_cols: | |
# Use df_full for filter options to show all possibilities | |
for demo_col in demographic_cols: | |
# Ensure the column exists in df_full before creating filter | |
if demo_col in df_full.columns: | |
unique_values = sorted(df_full[demo_col].astype(str).unique()) | |
if len(unique_values) > 1: | |
selected_filters[demo_col] = st.sidebar.multiselect( | |
f"{demo_col}", | |
options=unique_values, | |
default=unique_values | |
) | |
else: | |
# If only one value, no need for multiselect, just store it | |
selected_filters[demo_col] = unique_values | |
else: | |
st.sidebar.warning(f"Colonna demografica '{demo_col}' definita ma non trovata nel DataFrame.") | |
# Apply filters - start from df_full each time filters change | |
df_filtered = df_full.copy() | |
for col, selected_values in selected_filters.items(): | |
# Check if the column exists in df_filtered before applying the filter | |
if col in df_filtered.columns and selected_values: | |
# Ensure selected_values are strings for comparison if the column is string | |
if df_filtered[col].dtype == 'object': | |
selected_values_str = [str(v) for v in selected_values] | |
df_filtered = df_filtered[df_filtered[col].astype(str).isin(selected_values_str)] | |
else: # Keep original type for non-object columns if filtering is needed | |
df_filtered = df_filtered[df_filtered[col].isin(selected_values)] | |
else: | |
st.sidebar.warning("Nessuna colonna demografica valida trovata per i filtri.") | |
df_filtered = df_full.copy() if df_full is not None else pd.DataFrame() # Use full data if available, else empty | |
st.sidebar.divider() | |
st.sidebar.subheader("📊 Metriche Chiave (Filtrate)") | |
# Recalculate total respondents after filtering | |
total_respondents_filtered = len(df_filtered) if df_filtered is not None else 0 | |
st.sidebar.metric("Rispondenti Filtrati", total_respondents_filtered) | |
# --- Calculate metrics only if df_filtered is not empty --- | |
avg_overall_filtered = np.nan | |
avg_scores_per_category_f = pd.Series(dtype=float) | |
driver_df = pd.DataFrame() # Initialize empty driver dataframe | |
# Default correlation method | |
corr_method_sidebar = 'pearson' | |
if SCIPY_AVAILABLE: | |
corr_method_sidebar = 'spearman' # Prefer Spearman if scipy is available | |
if df_filtered is not None and not df_filtered.empty and numeric_question_cols: | |
# Ensure overall satisfaction question exists in the filtered numeric columns | |
if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns and pd.api.types.is_numeric_dtype(df_filtered[overall_satisfaction_question]): | |
overall_sat_data = df_filtered[overall_satisfaction_question].dropna() | |
if not overall_sat_data.empty: | |
avg_overall_filtered = overall_sat_data.mean() | |
midpoint = (response_scale[0] + response_scale[1]) / 2 if response_scale else 3.5 # Fallback midpoint | |
delta_vs_mid = avg_overall_filtered - midpoint | |
st.sidebar.metric("Soddisfazione Generale Media", f"{avg_overall_filtered:.2f}", f"{delta_vs_mid:+.2f} vs Midpoint ({midpoint:.1f})") | |
else: | |
st.sidebar.metric("Soddisfazione Generale Media", "N/D (no data)") | |
else: | |
st.sidebar.metric("Soddisfazione Generale Media", "N/D (Domanda non trovata/valida)") | |
# Calculate category averages on filtered data | |
numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] | |
if numeric_cols_in_filtered: | |
avg_scores_per_question_f = df_filtered[numeric_cols_in_filtered].mean(axis=0, skipna=True) | |
df_avg_scores_f = pd.DataFrame({'Domanda': avg_scores_per_question_f.index, 'Punteggio Medio': avg_scores_per_question_f.values}) | |
df_avg_scores_f['Categoria'] = df_avg_scores_f['Domanda'].map(question_to_category_map).fillna("Senza Categoria") | |
df_avg_scores_f.dropna(subset=['Punteggio Medio'], inplace=True) | |
if not df_avg_scores_f.empty: | |
# Exclude "Senza Categoria" from min/max display if desired | |
avg_scores_valid_cat = df_avg_scores_f[df_avg_scores_f['Categoria'] != "Senza Categoria"] | |
if not avg_scores_valid_cat.empty: | |
avg_scores_per_category_f = avg_scores_valid_cat.groupby('Categoria')['Punteggio Medio'].mean().sort_values() | |
if not avg_scores_per_category_f.empty: | |
min_cat_score = avg_scores_per_category_f.iloc[0] | |
max_cat_score = avg_scores_per_category_f.iloc[-1] | |
delta_min = f"{min_cat_score - avg_overall_filtered:.2f} vs Sod. Gen." if not np.isnan(avg_overall_filtered) else None | |
delta_max = f"{max_cat_score - avg_overall_filtered:.2f} vs Sod. Gen." if not np.isnan(avg_overall_filtered) else None | |
st.sidebar.metric(f"⚠️ Cat. Punteggio MIN", f"{avg_scores_per_category_f.index[0]} ({min_cat_score:.2f})", delta_min, delta_color="inverse") | |
st.sidebar.metric(f"✅ Cat. Punteggio MAX", f"{avg_scores_per_category_f.index[-1]} ({max_cat_score:.2f})", delta_max, delta_color="normal") | |
else: | |
st.sidebar.text("N/D per Categorie (Vuote dopo agg.)") | |
else: | |
st.sidebar.text("N/D per Categorie (Solo 'Senza Cat.')") | |
else: | |
st.sidebar.text("N/D per Categorie (No medie domande)") | |
else: | |
st.sidebar.text("N/D per Categorie (No colonne numeriche)") | |
# --- Calculate Driver Data (Correlation) --- | |
if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns and pd.api.types.is_numeric_dtype(df_filtered[overall_satisfaction_question]): | |
# Ensure overall satisfaction has variance | |
if df_filtered[overall_satisfaction_question].nunique(dropna=True) > 1: | |
driver_candidate_cols = [col for col in numeric_cols_in_filtered if col != overall_satisfaction_question and df_filtered[col].nunique(dropna=True) > 1] | |
if driver_candidate_cols: | |
try: | |
# Calculate correlations | |
correlations = df_filtered[driver_candidate_cols].corrwith(df_filtered[overall_satisfaction_question], method=corr_method_sidebar).dropna() | |
# Calculate average scores for the same candidates | |
avg_scores_drivers = df_filtered[driver_candidate_cols].mean(skipna=True) | |
# Combine into driver_df | |
if not correlations.empty: | |
driver_df = pd.DataFrame({'Correlazione': correlations}) | |
# Add avg scores safely, aligning index | |
driver_df = driver_df.join(avg_scores_drivers.rename('Punteggio Medio'), how='inner') # Inner join ensures only questions with both corr and avg score remain | |
if not driver_df.empty: | |
driver_df['Categoria'] = driver_df.index.map(question_to_category_map).fillna("Senza Categoria") | |
driver_df.dropna(subset=['Categoria', 'Correlazione', 'Punteggio Medio'], inplace=True) # Drop if essential data missing | |
if not driver_df.empty: | |
driver_df['Domanda'] = driver_df.index | |
driver_df['Domanda_Breve'] = driver_df['Domanda'].apply(lambda x: str(x)[:47] + "..." if len(str(x)) > 50 else str(x)) | |
driver_df['Correlazione_Abs'] = driver_df['Correlazione'].abs() | |
else: | |
driver_df = pd.DataFrame() # Ensure it's empty if join fails | |
else: | |
st.sidebar.info("Nessuna correlazione significativa calcolata per i driver.") | |
except Exception as e: | |
st.sidebar.warning(f"Errore nel calcolo correlazioni driver: {e}") | |
else: | |
st.sidebar.info("Nessuna domanda candidata (con varianza) trovata per l'analisi driver.") | |
else: | |
st.sidebar.info("La domanda di soddisfazione generale non ha varianza nei dati filtrati.") | |
else: # If df_filtered is empty or no numeric questions | |
st.sidebar.text("Dati insufficienti o non disponibili per le metriche.") | |
if total_respondents_filtered == 0: | |
st.sidebar.text("Nessun rispondente selezionato.") | |
st.sidebar.metric("Soddisfazione Generale Media", "N/D") | |
st.sidebar.text("N/D per Categorie") | |
st.sidebar.divider() | |
st.sidebar.info("Utilizza i filtri per esplorare i dati. Le metriche e i grafici si aggiornano dinamicamente.") | |
# ============================================================================== | |
# --- Create Tabs --- | |
# ============================================================================== | |
tab_list = [ | |
"🎯 Sintesi Chiave", | |
"🗺️ Mappa Domande", # New Tab for Question Map | |
"👥 Demografia Dettagliata", | |
"📊 Generale & Categorie", | |
"🔍 Confronti & Driver", | |
"📈 Grafici Avanzati" | |
] | |
tabs = st.tabs(tab_list) | |
# Assign tabs to variables dynamically for easier access | |
tab_summary = tabs[0] | |
tab_map = tabs[1] | |
tab_demo = tabs[2] | |
tab_overall = tabs[3] | |
tab_comp = tabs[4] | |
tab_advanced = tabs[5] | |
# ============================================================================== | |
# --- TAB Summary: Key Takeaways --- | |
# ============================================================================== | |
with tab_summary: | |
# Content remains largely the same, but relies on variables calculated in sidebar | |
st.header("🎯 Sintesi Chiave (Basata sui Filtri Correnti)") | |
if df_filtered is None or df_filtered.empty: | |
st.warning("Nessun dato disponibile con i filtri selezionati.") | |
else: | |
st.markdown(f"Analisi basata su **{total_respondents_filtered}** rispondenti.") | |
col_s1, col_s2, col_s3 = st.columns([2, 1, 1]) # Adjusted columns for gauge | |
with col_s1: | |
st.subheader("Punti Salienti:") | |
if not np.isnan(avg_overall_filtered): | |
max_scale = response_scale[1] if response_scale else 6 # Fallback max scale | |
st.markdown(f"- **Soddisfazione Generale:** {avg_overall_filtered:.2f} / {max_scale:.0f}") | |
else: | |
st.markdown(f"- **Soddisfazione Generale:** N/D") | |
if not avg_scores_per_category_f.empty: | |
st.markdown(f"- **Area Più Forte:** {avg_scores_per_category_f.index[-1]} (Media: {avg_scores_per_category_f.iloc[-1]:.2f})") | |
st.markdown(f"- **Area Più Debole:** {avg_scores_per_category_f.index[0]} (Media: {avg_scores_per_category_f.iloc[0]:.2f})") | |
else: | |
st.markdown("- Dati categorie non disponibili.") | |
# Driver info from pre-calculated driver_df | |
if not driver_df.empty: | |
try: | |
# Top positive driver | |
top_driver = driver_df.sort_values('Correlazione', ascending=False).iloc[0] | |
st.markdown(f"- **Driver Positivo Principale:** {top_driver['Domanda_Breve']} (Corr: {top_driver['Correlazione']:.2f})") | |
# Top area for improvement (high correlation, low score) - using dynamic means | |
avg_corr_summary = driver_df['Correlazione'].mean() | |
avg_score_summary = driver_df['Punteggio Medio'].mean() | |
potential_improvement_df = driver_df[(driver_df['Correlazione'] > avg_corr_summary) & (driver_df['Punteggio Medio'] < avg_score_summary)] | |
if not potential_improvement_df.empty: | |
potential_improvement = potential_improvement_df.sort_values('Punteggio Medio').iloc[0] # Lowest score among high-impact, low-perf | |
st.markdown(f"- **Focus Miglioramento:** {potential_improvement['Domanda_Breve']} (Score: {potential_improvement['Punteggio Medio']:.2f}, Corr: {potential_improvement['Correlazione']:.2f})") | |
else: | |
st.markdown("- *Focus Miglioramento:* (Nessun driver critico trovato con medie correnti)") | |
except IndexError: | |
st.markdown("- *Driver Principali:* (Errore nell'accesso ai dati driver)") | |
except Exception as e: | |
st.markdown(f"- *Driver Principali:* (Errore: {e})") | |
else: | |
st.markdown("- *Driver Principali:* (Dati non disponibili o insufficienti)") | |
with col_s2: | |
st.subheader("Sentiment") # Combined Pie and Gauge | |
if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns: | |
overall_satisfaction_data_f = df_filtered[overall_satisfaction_question].dropna() | |
if pd.api.types.is_numeric_dtype(overall_satisfaction_data_f) and not overall_satisfaction_data_f.empty: | |
# Sentiment Pie Chart | |
bucket_counts = overall_satisfaction_data_f.apply(categorize_score).value_counts() | |
# Add 'Non Risposto' if it exists | |
# non_risposto_count = df_filtered[overall_satisfaction_question].isna().sum() # Needs careful handling if mixing counts and percentages | |
bucket_counts = bucket_counts.reindex(list(BUCKET_COLORS.keys()) + ["Non Risposto"], fill_value=0) # Ensure all buckets + Non Risposto | |
bucket_perc = (bucket_counts / bucket_counts.sum() * 100) if bucket_counts.sum() > 0 else bucket_counts | |
# Define colors including for "Non Risposto" | |
plot_colors = BUCKET_COLORS.copy() | |
plot_colors["Non Risposto"] = "#bbbbbb" # Grey for non-responded | |
fig_sentiment_pie = px.pie(values=bucket_perc.values, names=bucket_perc.index, | |
title="Distribuzione Sentiment", hole=0.4, | |
color=bucket_perc.index, color_discrete_map=plot_colors, | |
template=PLOTLY_TEMPLATE) | |
fig_sentiment_pie.update_traces(textinfo='percent+label', sort=False, # Keep defined order | |
pull=[0.05 if name=="Critico" else 0 for name in bucket_perc.index]) | |
fig_sentiment_pie.update_layout(showlegend=False, margin=dict(t=30, b=10, l=10, r=10), height=250) # Compact layout | |
st.plotly_chart(fig_sentiment_pie, use_container_width=True) | |
else: | |
st.write("Dati soddisfazione non numerici/vuoti.") | |
else: | |
st.write("Domanda soddisfazione non trovata.") | |
with col_s3: | |
st.subheader("Valore Medio") | |
if not np.isnan(avg_overall_filtered): | |
min_scale, max_scale = response_scale if response_scale else (1, 6) | |
midpoint = (min_scale + max_scale) / 2 | |
fig_gauge = go.Figure(go.Indicator( | |
mode = "gauge+number", | |
value = avg_overall_filtered, | |
domain = {'x': [0, 1], 'y': [0, 1]}, | |
title = {'text': "Soddisfazione Generale", 'font': {'size': 16}}, | |
gauge = { | |
'axis': {'range': [min_scale, max_scale], 'tickwidth': 1, 'tickcolor': "darkblue"}, | |
'bar': {'color': "steelblue"}, | |
'bgcolor': "white", | |
'borderwidth': 2, | |
'bordercolor': "gray", | |
'steps': [ | |
{'range': [min_scale, THRESHOLD_LOW], 'color': BUCKET_COLORS['Critico']}, | |
{'range': [THRESHOLD_LOW, THRESHOLD_HIGH], 'color': BUCKET_COLORS['Neutrale']}, | |
{'range': [THRESHOLD_HIGH, max_scale], 'color': BUCKET_COLORS['Positivo']}], | |
'threshold': { | |
'line': {'color': "black", 'width': 3}, | |
'thickness': 0.9, | |
'value': midpoint } # Show midpoint | |
})) | |
fig_gauge.update_layout(height=250, margin=dict(t=40, b=10, l=10, r=10)) # Compact layout | |
st.plotly_chart(fig_gauge, use_container_width=True) | |
else: | |
st.write(" ") # Placeholder | |
st.write(" ") | |
st.info("Gauge non disponibile (media N/D).") | |
st.markdown("---") | |
st.subheader("Riflessioni Rapide:") | |
satisfaction_text = f"{avg_overall_filtered:.2f}" if not np.isnan(avg_overall_filtered) else "N/D" | |
strongest_area_text = f"{avg_scores_per_category_f.index[-1]}" if not avg_scores_per_category_f.empty else "N/D" | |
weakest_area_text = f"{avg_scores_per_category_f.index[0]}" if not avg_scores_per_category_f.empty else "N/D" | |
st.info(f""" | |
Questa sintesi evidenzia i risultati principali per il gruppo selezionato ({total_respondents_filtered} persone). | |
La soddisfazione generale si attesta a **{satisfaction_text}**. | |
Le aree di forza (**{strongest_area_text}**) e di debolezza (**{weakest_area_text}**) | |
richiedono attenzione specifica. Esplora le altre schede per dettagli, confronti e visualizzazioni avanzate. | |
""") | |
# ============================================================================== | |
# --- TAB Map: Category -> Question Mapping --- | |
# ============================================================================== | |
with tab_map: | |
st.header("🗺️ Mappa Categorie e Domande") | |
st.write("Questa sezione mostra quali domande appartengono a ciascuna categoria identificata durante il caricamento dei dati.") | |
if question_to_category_map: | |
# Create DataFrame from the mapping dictionary | |
map_df = pd.DataFrame(question_to_category_map.items(), columns=['Domanda', 'Categoria']) | |
# Sort for better readability | |
map_df = map_df.sort_values(by=['Categoria', 'Domanda']).reset_index(drop=True) | |
st.dataframe(map_df, use_container_width=True) | |
# Optional: Display grouped by category | |
st.divider() | |
st.subheader("Domande Raggruppate per Categoria") | |
categories_in_map = map_df['Categoria'].unique() | |
for category in sorted(categories_in_map): | |
with st.expander(f"**{category}**"): | |
questions_in_cat = map_df[map_df['Categoria'] == category]['Domanda'].tolist() | |
for q in questions_in_cat: | |
st.markdown(f"- {q}") | |
else: | |
st.warning("La mappa tra domande e categorie non è disponibile.") | |
# ============================================================================== | |
# --- TAB Demo: Demographics --- | |
# ============================================================================== | |
with tab_demo: | |
st.header("👥 Analisi Demografica Dettagliata (Filtrata)") | |
if df_filtered is None or df_filtered.empty: | |
st.warning("Nessun dato disponibile con i filtri selezionati.") | |
elif not demographic_cols: | |
st.warning("Nessuna colonna demografica configurata per l'analisi.") | |
else: | |
st.write(f"Visualizzazione basata su **{len(df_filtered)}** rispondenti selezionati.") | |
valid_demo_cols_plots = [col for col in demographic_cols if col in df_filtered.columns] # Use only valid cols for plotting | |
if not valid_demo_cols_plots: | |
st.warning("Nessuna colonna demografica valida trovata nei dati filtrati per la visualizzazione.") | |
else: | |
# --- Basic Distribution Pies --- | |
st.subheader("Distribuzione Base") | |
num_demo_cols = len(valid_demo_cols_plots) | |
cols_pie = st.columns(num_demo_cols) | |
pie_colors = [px.colors.qualitative.Pastel1, px.colors.qualitative.Pastel2, px.colors.qualitative.Set3] # Cycle through color schemes | |
for i, demo_col in enumerate(valid_demo_cols_plots): | |
with cols_pie[i % num_demo_cols]: # Cycle through columns | |
if not df_filtered[demo_col].dropna().empty: | |
# Define order for age if applicable | |
category_orders = {} | |
if 'Eta' in demo_col: | |
age_order_guess = ['Fino a 30 anni', '31-40 anni', '41-50 anni', 'Oltre i 50 anni', 'Non specificato'] | |
actual_ages = df_filtered[demo_col].unique() | |
ordered_actual = [age for age in age_order_guess if age in actual_ages] | |
ordered_actual.extend(sorted([age for age in actual_ages if age not in age_order_guess])) | |
category_orders={demo_col: ordered_actual} | |
fig_pie = px.pie(df_filtered.dropna(subset=[demo_col]), names=demo_col, hole=0.4, | |
color_discrete_sequence=pie_colors[i % len(pie_colors)], template=PLOTLY_TEMPLATE, | |
title=f"Per {demo_col}", category_orders=category_orders) | |
fig_pie.update_traces(textposition='inside', textinfo='percent+label') | |
fig_pie.update_layout(showlegend=False, title_x=0.5, margin=dict(t=40, b=0, l=0, r=0), height=300) | |
st.plotly_chart(fig_pie, use_container_width=True) | |
else: | |
st.write(f"Dati '{demo_col}' non disponibili.") | |
st.markdown("---") | |
# --- Hierarchical Views: Sunburst & Treemap --- | |
st.subheader("Visualizzazioni Gerarchiche/Proporzionali") | |
if len(valid_demo_cols_plots) >= 2: # Need at least 2 demographics for interesting hierarchy | |
chart_type_hier = st.radio("Scegli tipo grafico gerarchico:", ["Sunburst", "Treemap"], horizontal=True, key="hier_chart_sel") | |
# Aggregate counts for combinations | |
try: | |
df_grouped_hier = df_filtered.groupby(valid_demo_cols_plots, observed=True).size().reset_index(name='Conteggio') | |
if not df_grouped_hier.empty: | |
# Use first valid demo col for coloring | |
color_col_hier = valid_demo_cols_plots[0] | |
if chart_type_hier == "Sunburst": | |
fig_hier = px.sunburst(df_grouped_hier, path=valid_demo_cols_plots, values='Conteggio', | |
title=f"Distribuzione Combinata (Sunburst): {', '.join(valid_demo_cols_plots)}", | |
template=PLOTLY_TEMPLATE, | |
color=color_col_hier, | |
color_discrete_sequence=px.colors.qualitative.Pastel) | |
fig_hier.update_layout(margin=dict(t=50, l=25, r=25, b=25)) | |
st.plotly_chart(fig_hier, use_container_width=True) | |
elif chart_type_hier == "Treemap": | |
fig_hier = px.treemap(df_grouped_hier, path=[px.Constant("Tutti")] + valid_demo_cols_plots, values='Conteggio', | |
title=f"Distribuzione Combinata (Treemap): {', '.join(valid_demo_cols_plots)}", | |
template=PLOTLY_TEMPLATE, | |
color=color_col_hier, | |
color_discrete_sequence=px.colors.qualitative.Pastel) | |
fig_hier.update_layout(margin=dict(t=50, l=25, r=25, b=25)) | |
st.plotly_chart(fig_hier, use_container_width=True) | |
else: | |
st.info("Nessun dato aggregato per la visualizzazione gerarchica.") | |
except Exception as e: | |
st.error(f"Errore durante l'aggregazione per il grafico gerarchico: {e}") | |
else: | |
st.info("Sono necessarie almeno due colonne demografiche valide per le visualizzazioni gerarchiche.") | |
# ============================================================================== | |
# --- TAB Overall: Overall, Categories & Questions --- | |
# ============================================================================== | |
with tab_overall: | |
st.header("📊 Analisi Generale, Categorie e Domande (Filtrata)") | |
if df_filtered is None or df_filtered.empty: | |
st.warning("Nessun dato disponibile con i filtri selezionati.") | |
else: | |
# --- Overall Satisfaction Distribution --- | |
st.subheader("⭐ Soddisfazione Generale Complessiva") | |
if overall_satisfaction_question and overall_satisfaction_question in df_filtered.columns: | |
overall_satisfaction_data_f = df_filtered[overall_satisfaction_question].dropna() | |
if pd.api.types.is_numeric_dtype(overall_satisfaction_data_f) and not overall_satisfaction_data_f.empty: | |
col_ov1, col_ov2 = st.columns([2,1]) | |
with col_ov1: | |
# Bar chart of distribution | |
overall_counts_f = overall_satisfaction_data_f.value_counts().sort_index() | |
fig_overall_satisfaction = px.bar(overall_counts_f, x=overall_counts_f.index, y=overall_counts_f.values, | |
labels={'x': f'Punteggio ({response_scale[0]:.0f}-{response_scale[1]:.0f})', 'y': 'Numero Risposte'}, | |
text_auto=True, color_discrete_sequence=px.colors.sequential.Blues_r, template=PLOTLY_TEMPLATE, | |
title="Distribuzione Punteggi Soddisfazione Generale") | |
fig_overall_satisfaction.update_layout(xaxis = dict(tickmode = 'linear', dtick=1), title_x=0.5) | |
st.plotly_chart(fig_overall_satisfaction, use_container_width=True) | |
with col_ov2: | |
# Sentiment display | |
st.write(" ") | |
st.write(" ") | |
st.write("**Distribuzione Sentiment:**") | |
bucket_counts = overall_satisfaction_data_f.apply(categorize_score).value_counts() | |
bucket_counts = bucket_counts.reindex(list(BUCKET_COLORS.keys()) + ["Non Risposto"], fill_value=0) | |
total_valid_responses = bucket_counts.sum() | |
if total_valid_responses > 0: | |
bucket_perc = (bucket_counts / total_valid_responses * 100) | |
plot_colors = BUCKET_COLORS.copy() | |
plot_colors["Non Risposto"] = "#bbbbbb" | |
for bucket in plot_colors.keys(): # Iterate in defined order | |
if bucket in bucket_perc.index: # Check if bucket exists | |
perc = bucket_perc.get(bucket, 0) | |
count = bucket_counts.get(bucket, 0) | |
st.markdown(f"<span style='color:{plot_colors.get(bucket, 'black')}; font-size: 1.1em;'>■</span> **{bucket}:** {perc:.1f}% ({count})", unsafe_allow_html=True) | |
else: | |
st.write("Nessuna risposta valida per il sentiment.") | |
else: st.warning("Dati soddisfazione generale non disponibili/numerici.") | |
else: st.warning("Domanda soddisfazione generale non trovata.") | |
st.markdown("---") | |
# --- Category Averages --- | |
st.subheader("📈 Punteggio Medio per Categoria") | |
if not avg_scores_per_category_f.empty: | |
cat_avg_chart_type = st.radio("Visualizza medie categorie come:", ["Bar Chart", "Bullet Chart"], horizontal=True, key="cat_avg_type") | |
if cat_avg_chart_type == "Bar Chart": | |
avg_scores_plot = avg_scores_per_category_f.copy() | |
color_map = [] | |
for score in avg_scores_plot.values: | |
if score > THRESHOLD_HIGH: color_map.append(BUCKET_COLORS["Positivo"]) | |
elif score < THRESHOLD_LOW: color_map.append(BUCKET_COLORS["Critico"]) | |
else: color_map.append(BUCKET_COLORS["Neutrale"]) | |
fig_avg_category = go.Figure(go.Bar( | |
x=avg_scores_plot.values, y=avg_scores_plot.index, orientation='h', | |
text=[f'{score:.2f}' for score in avg_scores_plot.values], marker_color=color_map )) | |
fig_avg_category.update_traces(textposition='outside') | |
fig_avg_category.update_layout( | |
xaxis_title=f'Punteggio Medio ({response_scale[0]:.0f}-{response_scale[1]:.0f})', yaxis_title='Categoria', | |
yaxis={'categoryorder':'total ascending'}, template=PLOTLY_TEMPLATE, title="Medie Categorie (Colorate per Soglia)") | |
if not np.isnan(avg_overall_filtered): | |
fig_avg_category.add_vline(x=avg_overall_filtered, line_width=2, line_dash="dash", line_color="grey", annotation_text="Media Sod. Gen.") | |
st.plotly_chart(fig_avg_category, use_container_width=True) | |
elif cat_avg_chart_type == "Bullet Chart": | |
st.write("Grafico Bullet: Confronta la media di categoria con la media generale e le soglie.") | |
min_scale, max_scale = response_scale if response_scale else (1, 6) | |
avg_scores_plot = avg_scores_per_category_f.copy().sort_values(ascending=False) | |
for category, score in avg_scores_plot.items(): | |
fig_bullet = go.Figure(go.Indicator( | |
mode = "gauge+number+delta", | |
value = score, | |
delta = {'reference': avg_overall_filtered, 'suffix': ' vs Media Gen.'} if not np.isnan(avg_overall_filtered) else None, | |
title = {'text': category, 'font': {'size': 14}}, | |
gauge = { | |
'shape': "bullet", | |
'axis': {'range': [min_scale, max_scale]}, | |
'threshold': { | |
'line': {'color': "black", 'width': 2}, | |
'thickness': 0.75, | |
'value': avg_overall_filtered if not np.isnan(avg_overall_filtered) else (min_scale+max_scale)/2 }, | |
'bgcolor': "white", | |
'steps': [ | |
{'range': [min_scale, THRESHOLD_LOW], 'color': BUCKET_COLORS['Critico']}, | |
{'range': [THRESHOLD_LOW, THRESHOLD_HIGH], 'color': BUCKET_COLORS['Neutrale']}, | |
{'range': [THRESHOLD_HIGH, max_scale], 'color': BUCKET_COLORS['Positivo']}], | |
'bar': {'color': 'darkblue', 'thickness': 0.5} | |
})) | |
fig_bullet.update_layout(height=100, margin=dict(l=200, r=50, t=30, b=10)) | |
st.plotly_chart(fig_bullet, use_container_width=True) | |
else: | |
st.warning("Impossibile calcolare medie per categoria (potrebbero essere tutte 'Senza Categoria' o vuote).") | |
st.markdown("---") | |
# --- Detailed Question Analysis --- | |
st.subheader("❓ Analisi Dettagliata per Domanda") | |
# Get categories present in the calculated averages | |
categories_with_averages = avg_scores_per_category_f.index.unique().tolist() | |
if not categories_with_averages: | |
# Fallback: get categories from the original map if averages failed | |
if question_to_category_map: | |
categories_with_averages = sorted(list(set(question_to_category_map.values()))) | |
if "Senza Categoria" in categories_with_averages: categories_with_averages.remove("Senza Categoria") | |
if "Categoria Sconosciuta" in categories_with_averages: categories_with_averages.remove("Categoria Sconosciuta") | |
else: | |
categories_with_averages = [] | |
if categories_with_averages: # Proceed only if there are valid categories | |
col_q1, col_q2 = st.columns([1,1]) | |
with col_q1: | |
selected_category = st.selectbox("Seleziona Categoria:", options=categories_with_averages, key="cat_select_q") | |
with col_q2: | |
plot_type = st.radio("Tipo Grafico Domande:", ["Distribuzione % (Stacked)", "Conteggi (Bar)", "Box Plot"], horizontal=True, key="q_plot_type") | |
if selected_category: | |
st.write(f"**Dettaglio Domande: '{selected_category}'**") | |
# Find questions mapped to the selected category, ensuring they are numeric and exist | |
questions_in_category = [q for q, cat in question_to_category_map.items() | |
if cat == selected_category and q in df_filtered.columns and q in numeric_question_cols] | |
if not questions_in_category: | |
st.write("Nessuna domanda numerica valida trovata per questa categoria nei dati filtrati.") | |
else: | |
# Prepare data for box plot if selected | |
if plot_type == "Box Plot": | |
df_box_cat = df_filtered[questions_in_category].copy() | |
if not df_box_cat.empty: | |
df_box_melted = df_box_cat.melt(var_name='Domanda', value_name='Punteggio') | |
# Shorten question names for y-axis | |
df_box_melted['Domanda_Breve'] = df_box_melted['Domanda'].apply(lambda x: x[:67]+"..." if len(x) > 70 else x) | |
df_box_melted.dropna(subset=['Punteggio'], inplace=True) | |
if not df_box_melted.empty: | |
fig_box = px.box(df_box_melted, x='Punteggio', y='Domanda_Breve', orientation='h', | |
title=f"Distribuzione Punteggi per Domanda in '{selected_category}'", | |
template=PLOTLY_TEMPLATE, points=False) # points="all" can be noisy | |
fig_box.update_layout(yaxis={'categoryorder':'total descending'}, height=max(400, len(questions_in_category)*50)) # Dynamic height | |
st.plotly_chart(fig_box, use_container_width=True) | |
else: | |
st.warning("Nessun dato valido per il Box Plot dopo il dropna.") | |
else: | |
st.warning("DataFrame vuoto per il Box Plot.") | |
else: # Stacked or Counts Bar Chart | |
for question in questions_in_category: | |
question_data_f = df_filtered[question].dropna() | |
if pd.api.types.is_numeric_dtype(question_data_f) and not question_data_f.empty: | |
avg_q = question_data_f.mean() | |
q_display = question if len(question) < 100 else question[:97] + "..." | |
st.markdown(f"**{q_display}** (Media: {avg_q:.2f})") | |
if plot_type == "Conteggi (Bar)": | |
counts_q = question_data_f.value_counts().sort_index() | |
if not counts_q.empty: | |
fig_q = px.bar(counts_q, x=counts_q.index, y=counts_q.values, | |
labels={'x': 'Punteggio', 'y': 'Numero Risposte'}, text_auto='.2s', | |
height=250, template=PLOTLY_TEMPLATE, color_discrete_sequence=px.colors.sequential.Blues_r) | |
fig_q.update_layout(xaxis = dict(tickmode = 'linear', dtick=1), margin=dict(t=5, b=5, l=5, r=5)) | |
st.plotly_chart(fig_q, use_container_width=True) | |
else: st.caption("Nessun dato per questo grafico.") | |
elif plot_type == "Distribuzione % (Stacked)": | |
counts_q_norm = question_data_f.value_counts(normalize=True).sort_index() * 100 | |
if not counts_q_norm.empty: | |
counts_q_df = counts_q_norm.reset_index() | |
counts_q_df.columns = ['Punteggio', 'Percentuale'] | |
counts_q_df['Punteggio'] = counts_q_df['Punteggio'].astype(str) # For discrete colors | |
# Define a color map for the scores in the stacked bar | |
unique_scores = sorted(counts_q_df['Punteggio'].astype(float).unique()) | |
colors = px.colors.sequential.Blues_r | |
score_color_map = {str(score): colors[min(len(colors)-1, int((score - response_scale[0]) / (response_scale[1] - response_scale[0]) * len(colors)))] | |
for score in unique_scores} | |
fig_q = px.bar(counts_q_df, x='Percentuale', y=[' ']*len(counts_q_df), # Single bar | |
color='Punteggio', orientation='h', | |
text=[f"{p:.1f}%" for p in counts_q_df['Percentuale']], | |
height=150, template=PLOTLY_TEMPLATE, | |
color_discrete_map=score_color_map # Apply color map | |
) | |
fig_q.update_layout(xaxis_ticksuffix="%", yaxis_title="", xaxis_title="% Rispondenti", | |
legend_title="Punteggio", showlegend=True, margin=dict(t=5, b=5, l=5, r=5), | |
xaxis_range=[0,100], yaxis_visible=False, | |
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)) | |
fig_q.update_traces(textposition='inside', textfont_color='white') # Ensure text is visible | |
st.plotly_chart(fig_q, use_container_width=True) | |
else: st.caption("Nessun dato per questo grafico.") | |
else: | |
st.caption(f"Dati per '{question[:50]}...' non numerici o vuoti.") | |
else: | |
st.info("Nessuna categoria valida trovata per l'analisi dettagliata delle domande.") | |
# ============================================================================== | |
# --- TAB Comparisons: Comparisons, Drivers & More --- | |
# ============================================================================== | |
with tab_comp: | |
st.header("🔍 Confronti Demografici & Analisi Driver (Filtrata)") | |
if df_filtered is None or df_filtered.empty: | |
st.warning("Nessun dato disponibile con i filtri selezionati.") | |
elif not numeric_question_cols: | |
st.warning("Nessuna domanda numerica trovata per le analisi di confronto.") | |
else: | |
# --- Prepare Melted Data --- | |
# Cache the melting process | |
def get_melted_data(df, id_vars, value_vars, cat_map): | |
if not value_vars: return pd.DataFrame() | |
cols_to_melt = [col for col in id_vars + value_vars if col in df.columns] | |
value_vars_valid = [col for col in value_vars if col in cols_to_melt] | |
id_vars_valid = [col for col in id_vars if col in cols_to_melt] | |
if not value_vars_valid or not id_vars_valid: return pd.DataFrame() # Need both ID and Value vars | |
df_melted = df[cols_to_melt].melt(id_vars=id_vars_valid, value_vars=value_vars_valid, var_name='Domanda', value_name='Punteggio') | |
df_melted['Categoria'] = df_melted['Domanda'].map(cat_map).fillna("Senza Categoria") | |
df_melted.dropna(subset=['Punteggio'], inplace=True) | |
return df_melted | |
numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] | |
valid_demographic_cols = [col for col in demographic_cols if col in df_filtered.columns] | |
df_melted_f = pd.DataFrame() # Initialize empty | |
if valid_demographic_cols and numeric_cols_in_filtered: | |
df_melted_f = get_melted_data(df_filtered, valid_demographic_cols, numeric_cols_in_filtered, question_to_category_map) | |
# --- Demographic Comparisons (Violin / Box Plots) --- | |
st.subheader("🎻 Confronti Demografici (Distribuzione Punteggi per Categoria)") | |
if not df_melted_f.empty and valid_demographic_cols: | |
col_comp1, col_comp2 = st.columns(2) | |
with col_comp1: | |
# Select demographic group for comparison | |
comparison_group_v_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] # Only those with multiple values | |
if comparison_group_v_options: | |
comparison_group_v = st.selectbox("Confronta Distribuzioni per:", comparison_group_v_options, key="dist_group") | |
else: | |
comparison_group_v = None | |
st.info("Nessuna colonna demografica con valori multipli per il confronto.") | |
with col_comp2: | |
dist_plot_type = st.radio("Tipo Grafico Distribuzione:", ["Violin Plot", "Box Plot"], horizontal=True, key="dist_plot_type") | |
if comparison_group_v: # Proceed only if a valid comparison group is selected | |
# Select categories to show (use averages calculated in sidebar) | |
categories_with_averages = avg_scores_per_category_f.index.unique().tolist() | |
if categories_with_averages: | |
default_cats_dist = avg_scores_per_category_f.nsmallest(3).index.tolist() | |
default_cats_dist = [cat for cat in default_cats_dist if cat in categories_with_averages] # Ensure defaults are valid | |
selected_cats_dist = st.multiselect("Seleziona Categorie da Visualizzare:", options=categories_with_averages, default=default_cats_dist, key="cat_dist") | |
if selected_cats_dist: | |
# Filter melted data for selected categories and ensure comparison group is not NA | |
df_dist = df_melted_f[(df_melted_f['Categoria'].isin(selected_cats_dist)) & | |
(df_melted_f[comparison_group_v].notna()) & | |
(df_melted_f[comparison_group_v] != 'Non specificato')] # Exclude 'Non specificato'? Optional. | |
if not df_dist.empty: | |
# Ensure hover data columns exist | |
hover_data = [col for col in valid_demographic_cols if col in df_dist.columns] | |
plot_func = px.violin if dist_plot_type == "Violin Plot" else px.box | |
caption_text = ("Il grafico a violino mostra la densità della distribuzione..." if dist_plot_type == "Violin Plot" | |
else "Il box plot mostra mediana, quartili...") | |
fig_dist = plot_func(df_dist, x='Categoria', y='Punteggio', color=comparison_group_v, | |
points=False, # 'all', False, 'outliers' | |
hover_data=hover_data, | |
category_orders={'Categoria': selected_cats_dist}, # Use selected order | |
template=PLOTLY_TEMPLATE, title=f"Distribuzione Punteggi per {comparison_group_v}") | |
fig_dist.update_layout(yaxis_range=[response_scale[0]-0.5, response_scale[1]+0.5]) | |
st.plotly_chart(fig_dist, use_container_width=True) | |
st.caption(caption_text) | |
else: | |
st.warning(f"Nessun dato per le categorie e gruppo '{comparison_group_v}' selezionati.") | |
else: | |
st.info("Seleziona almeno una categoria per visualizzare il confronto.") | |
else: | |
st.warning("Medie per categoria non disponibili.") | |
else: | |
st.info("Dati o colonne demografiche insufficienti per i confronti.") | |
st.markdown("---") | |
# --- Driver Analysis --- | |
st.subheader("🎯 Analisi Driver (Impatto vs Performance)") | |
if not driver_df.empty: # Use pre-calculated driver_df from sidebar | |
driver_plot_type = st.radio("Visualizza Analisi Driver come:", ["Scatter Plot", "Density Heatmap", "Bar Chart (Top/Bottom)"], horizontal=True, key="driver_plot_type") | |
if driver_plot_type == "Scatter Plot": | |
# (Code for Scatter Plot - seems okay, uses driver_df) | |
fig_scatter_drivers = px.scatter(driver_df, x='Punteggio Medio', y='Correlazione', | |
color='Categoria', | |
size='Correlazione_Abs', size_max=18, | |
hover_data=['Domanda_Breve', 'Punteggio Medio', 'Correlazione'], | |
template=PLOTLY_TEMPLATE, title=f"Driver: Impatto (Corr. {corr_method_sidebar.capitalize()}) vs Performance") | |
avg_corr = driver_df['Correlazione'].mean() | |
avg_score_all_q = driver_df['Punteggio Medio'].mean() | |
fig_scatter_drivers.add_vline(x=avg_score_all_q, line_width=1, line_dash="dash", line_color="grey", annotation_text="Media Perf.") | |
fig_scatter_drivers.add_hline(y=avg_corr, line_width=1, line_dash="dash", line_color="grey", annotation_text="Media Impatto") | |
fig_scatter_drivers.update_layout(xaxis_title="Performance (Punteggio Medio Domanda)", yaxis_title=f"Impatto (Corr. {corr_method_sidebar.capitalize()} con Sod. Gen.)") | |
st.plotly_chart(fig_scatter_drivers, use_container_width=True) | |
st.caption("Quadranti (vs medie): Alto Dx (Verde)=Forza Chiave; Alto Sx (Giallo)=Priorità Alta; Basso Sx (Rosso)=Priorità Bassa; Basso Dx (Blu)=Mantenimento Secondario. Dimensione = forza correlazione.") | |
elif driver_plot_type == "Density Heatmap": | |
# (Code for Density Heatmap - seems okay, uses driver_df) | |
fig_density_driver = px.density_heatmap(driver_df, x="Punteggio Medio", y="Correlazione", | |
marginal_x="histogram", marginal_y="histogram", | |
text_auto=False, | |
template=PLOTLY_TEMPLATE, title=f"Densità Driver: Impatto (Corr. {corr_method_sidebar.capitalize()}) vs Performance") | |
avg_corr = driver_df['Correlazione'].mean() | |
avg_score_all_q = driver_df['Punteggio Medio'].mean() | |
fig_density_driver.add_vline(x=avg_score_all_q, line_width=1, line_dash="dash", line_color="grey") | |
fig_density_driver.add_hline(y=avg_corr, line_width=1, line_dash="dash", line_color="grey") | |
fig_density_driver.update_layout(xaxis_title="Performance (Punteggio Medio Domanda)", yaxis_title=f"Impatto (Corr. {corr_method_sidebar.capitalize()} con Sod. Gen.)") | |
st.plotly_chart(fig_density_driver, use_container_width=True) | |
st.caption("Mostra dove si concentrano le domande nel piano Impatto-Performance.") | |
elif driver_plot_type == "Bar Chart (Top/Bottom)": | |
# (Code for Bar Chart - seems okay, uses driver_df) | |
top_n = st.slider("Numero Top/Bottom Driver da mostrare:", min_value=3, max_value=15, value=8, key="driver_topn") | |
driver_df_unique = driver_df.loc[~driver_df.index.duplicated(keep='first')] | |
top_drivers = driver_df_unique.sort_values('Correlazione', ascending=False).head(top_n) | |
bottom_drivers = driver_df_unique.sort_values('Correlazione', ascending=True).head(top_n) # Gets most negative | |
# Combine and ensure uniqueness (in case a driver is both top N pos and top N neg in small datasets) | |
drivers_to_plot = pd.concat([top_drivers, bottom_drivers]).drop_duplicates().sort_values('Correlazione') | |
if not drivers_to_plot.empty: | |
fig_drivers_bar = px.bar(drivers_to_plot, x='Correlazione', y='Domanda_Breve', orientation='h', | |
color='Categoria', template=PLOTLY_TEMPLATE, height=max(400, len(drivers_to_plot)*30), | |
title=f"Top/Bottom {top_n} Domande per Correlazione ({corr_method_sidebar.capitalize()}) con Sod. Gen.") | |
fig_drivers_bar.update_layout(yaxis={'categoryorder':'total ascending'}, xaxis_title=f"Correlazione {corr_method_sidebar.capitalize()}", yaxis_title="Domanda") | |
st.plotly_chart(fig_drivers_bar, use_container_width=True) | |
st.caption(f"Mostra le domande con la correlazione ({corr_method_sidebar}) più forte (positiva e negativa) con la soddisfazione generale.") | |
else: | |
st.warning("Nessun dato driver da mostrare nel grafico a barre.") | |
else: | |
st.warning("Impossibile calcolare l'analisi dei driver. Verifica la presenza e la varianza della domanda di soddisfazione generale e delle altre domande numeriche.") | |
st.markdown("---") | |
# --- Anomaly Detection & Recommendations --- | |
st.subheader("⚠️ Rilevamento Potenziali Punti d'Attenzione & Suggerimenti 💡") | |
# Use melted data calculated earlier | |
if not df_melted_f.empty and valid_demographic_cols and not avg_scores_per_category_f.empty: | |
col_anom, col_sugg = st.columns(2) | |
with col_anom: | |
st.write("**Possibili Punti d'Attenzione (Z-Score per Gruppo/Categoria):**") | |
try: | |
# Calculate overall category means and std deviations on the *filtered* dataset | |
overall_cat_stats = df_melted_f.groupby('Categoria')['Punteggio'].agg(['mean', 'std']).reset_index() | |
# Rename columns *before* merge | |
overall_cat_stats = overall_cat_stats.rename(columns={'mean': 'mean_overall', 'std': 'std_overall'}) | |
# Calculate group means within the filtered dataset | |
group_means = df_melted_f.groupby(valid_demographic_cols + ['Categoria'], observed=True)['Punteggio'].mean().reset_index() | |
# Rename columns *before* merge | |
group_means = group_means.rename(columns={'Punteggio': 'mean_group'}) | |
if not group_means.empty and not overall_cat_stats.empty: | |
# Merge using the renamed columns | |
merged_stats = pd.merge(group_means, overall_cat_stats, on='Categoria', how='left') | |
# Calculate Z-score only if std is not NaN and greater than a small epsilon | |
merged_stats_valid_std = merged_stats[merged_stats['std_overall'].notna() & (merged_stats['std_overall'] > 0.01)].copy() # Use copy to avoid SettingWithCopyWarning | |
if not merged_stats_valid_std.empty: | |
# *** CORRECTION HERE: Use correct column names *** | |
merged_stats_valid_std['Z_Score'] = (merged_stats_valid_std['mean_group'] - merged_stats_valid_std['mean_overall']) / merged_stats_valid_std['std_overall'] | |
z_score_threshold = st.slider("Soglia Z-Score per Attenzione:", min_value=1.0, max_value=3.0, value=1.75, step=0.25, key="zscore_thresh") | |
potential_anomalies = merged_stats_valid_std[abs(merged_stats_valid_std['Z_Score']) > z_score_threshold].sort_values(by='Z_Score') | |
if not potential_anomalies.empty: | |
st.write(f"Gruppi/Categorie con punteggio medio deviante (> {z_score_threshold:.2f} dev. std. dalla media della categoria):") | |
for _, row in potential_anomalies.head(10).iterrows(): # Limit display | |
group_desc_parts = [f"{col}={row[col]}" for col in valid_demographic_cols] | |
group_desc = " / ".join(group_desc_parts) | |
direction = "⚠️ Basso" if row['Z_Score'] < 0 else "✅ Alto" | |
# Use mean_group and Z_Score from the row | |
st.markdown(f"- {direction}: **{group_desc}** in **'{row['Categoria']}'** (Media Gruppo: {row['mean_group']:.2f}, Z: {row['Z_Score']:.2f})") | |
else: | |
st.info(f"Nessun punto d'attenzione rilevato con soglia Z-Score > {z_score_threshold:.2f} nei dati filtrati.") | |
else: | |
st.info("Deviazione standard non calcolabile o nulla per le categorie, impossibile calcolare Z-score.") | |
else: | |
st.info("Dati insufficienti per calcolare medie di gruppo o statistiche di categoria.") | |
except KeyError as e: | |
st.error(f"Errore Chiave durante il calcolo Z-Score: '{e}'. Verifica i nomi delle colonne dopo il merge.") | |
st.dataframe(merged_stats.head()) # Display merged df head for debugging | |
except Exception as e: | |
st.error(f"Errore generico durante il calcolo Z-Score: {e}") | |
with col_sugg: | |
# Suggestions part remains the same, using driver_df calculated in sidebar | |
st.write("**Suggerimenti Basati sui Driver & Punteggi Bassi:**") | |
if not avg_scores_per_category_f.empty: | |
lowest_cat_name = avg_scores_per_category_f.index[0] | |
lowest_cat_score = avg_scores_per_category_f.iloc[0] | |
st.markdown(f"**Area più debole (media bassa):** '{lowest_cat_name}' ({lowest_cat_score:.2f}).") | |
if not driver_df.empty: | |
avg_corr = driver_df['Correlazione'].mean() | |
avg_score_all_q = driver_df['Punteggio Medio'].mean() | |
low_score_threshold = avg_score_all_q | |
high_impact_threshold = avg_corr | |
critical_drivers = driver_df[ | |
(driver_df['Punteggio Medio'] < low_score_threshold) & | |
(driver_df['Correlazione'] > high_impact_threshold) | |
].sort_values('Correlazione', ascending=False) | |
if not critical_drivers.empty: | |
st.markdown("**Priorità Alte (Bassa Performance, Alto Impatto):**") | |
for _, row in critical_drivers.head(5).iterrows(): | |
st.markdown(f"- *{row['Domanda_Breve']}* (Cat: {row['Categoria']}, Score: {row['Punteggio Medio']:.2f}, Corr: {row['Correlazione']:.2f})") | |
st.warning("Intervenire su queste domande potrebbe avere il maggior impatto positivo sulla soddisfazione generale.") | |
else: | |
st.info("Nessuna domanda trovata nel quadrante 'Priorità Alte' con le soglie attuali.") | |
# Generic suggestions | |
suggestions = { | |
"Stress e benessere": "Considerare iniziative per la gestione dello stress, flessibilità lavorativa, e supporto psicologico.", | |
# ... (rest of suggestions map) ... | |
"Apertura e inclusione": "Programmi D&I, garantire libertà di espressione e sicurezza psicologica." | |
} | |
default_suggestion = "Approfondire le cause specifiche tramite focus group o interviste mirate." | |
st.markdown("**Possibili Azioni Generiche per l'Area più Debole:**") | |
st.info(suggestions.get(lowest_cat_name, default_suggestion)) | |
else: st.write("Nessun dato medio per categoria disponibile per generare suggerimenti.") | |
else: | |
st.info("Dati insufficienti per rilevare anomalie o fornire suggerimenti.") | |
# ============================================================================== | |
# --- TAB Advanced: More Complex Visualizations --- | |
# ============================================================================== | |
with tab_advanced: | |
st.header("📈 Grafici Avanzati (Filtrati)") | |
if df_filtered is None or df_filtered.empty: | |
st.warning("Nessun dato disponibile con i filtri selezionati.") | |
elif not numeric_question_cols: | |
st.warning("Nessuna domanda numerica trovata per le analisi avanzate.") | |
else: | |
# Use the melted data prepared in the Comparisons tab if available | |
if 'df_melted_f' not in locals() or df_melted_f.empty: | |
# Try to recreate df_melted_f if not available | |
numeric_cols_in_filtered = [col for col in numeric_question_cols if col in df_filtered.columns] | |
valid_demographic_cols = [col for col in demographic_cols if col in df_filtered.columns] | |
if valid_demographic_cols and numeric_cols_in_filtered: | |
df_melted_f = get_melted_data(df_filtered, valid_demographic_cols, numeric_cols_in_filtered, question_to_category_map) | |
else: | |
df_melted_f = pd.DataFrame() | |
if df_melted_f.empty and not numeric_cols_in_filtered: # Check again if still empty or no numerics | |
st.warning("Dati insufficienti per i grafici avanzati.") | |
else: | |
# --- 1. Correlation Heatmap --- | |
st.subheader("🔥 Heatmap di Correlazione tra Domande Numeriche") | |
corr_method_options = ['pearson'] | |
if SCIPY_AVAILABLE: | |
corr_method_options.append('spearman') | |
corr_method_adv = st.radio("Metodo Correlazione:", corr_method_options, horizontal=True, key="corr_method_adv") | |
numeric_cols_in_filtered_adv = [col for col in numeric_question_cols if col in df_filtered.columns and df_filtered[col].nunique(dropna=True) > 1] | |
if len(numeric_cols_in_filtered_adv) > 1: | |
# Etichette univoche e leggibili | |
corr_labels = { | |
q: (f"{str(q)[:27]}..." if len(str(q)) > 30 else str(q)) + f" [{i}]" | |
for i, q in enumerate(numeric_cols_in_filtered_adv) | |
} | |
df_corr = df_filtered[numeric_cols_in_filtered_adv].rename(columns=corr_labels) | |
try: | |
corr_matrix = df_corr.corr(method=corr_method_adv) | |
if not corr_matrix.empty: | |
fig_heatmap = px.imshow( | |
corr_matrix, | |
text_auto=".2f", | |
aspect="auto", | |
color_continuous_scale='RdBu_r', | |
range_color=[-1, 1], | |
template=PLOTLY_TEMPLATE, | |
title=f"Heatmap Correlazione ({corr_method_adv.capitalize()}) tra Domande" | |
) | |
heatmap_height = max(600, len(numeric_cols_in_filtered_adv) * 20) | |
fig_heatmap.update_layout(height=heatmap_height, xaxis_tickangle=-45) | |
st.plotly_chart(fig_heatmap, use_container_width=True) | |
st.caption("Rosso = correlazione negativa, Blu = correlazione positiva.") | |
else: | |
st.warning("Matrice di correlazione vuota.") | |
except Exception as e: | |
st.warning(f"Errore nel calcolo heatmap: {e}") | |
else: | |
st.info("Servono almeno due domande numeriche con varianza per la heatmap.") | |
st.markdown("---") | |
# --- 2. Radar Chart --- | |
st.subheader("🕸️ Radar Chart: Confronto Medie Categorie per Gruppo Demografico") | |
if not avg_scores_per_category_f.empty and valid_demographic_cols and not df_melted_f.empty: | |
radar_demo_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] | |
if radar_demo_options: | |
radar_demo_col = st.selectbox("Seleziona Gruppo Demografico per Confronto Radar:", radar_demo_options, key="radar_demo") | |
available_groups = sorted(df_filtered[radar_demo_col].astype(str).unique()) | |
available_groups = [g for g in available_groups if g != 'Non specificato'] # Exclude 'Non specificato'? | |
if len(available_groups) > 1: | |
groups_to_compare = st.multiselect(f"Seleziona '{radar_demo_col}' da confrontare:", options=available_groups, default=available_groups[:min(len(available_groups), 3)], key="radar_groups") | |
if groups_to_compare: | |
radar_data = df_melted_f[df_melted_f[radar_demo_col].isin(groups_to_compare)] | |
avg_radar = radar_data.groupby(['Categoria', radar_demo_col], observed=True)['Punteggio'].mean().unstack() | |
avg_radar = avg_radar.dropna(axis=0, how='all') # Drop categories with no data | |
if not avg_radar.empty: | |
categories_radar = avg_radar.index.tolist() | |
fig_radar = go.Figure() | |
color_sequence = px.colors.qualitative.Plotly # Use a color sequence | |
for i, group in enumerate(groups_to_compare): | |
if group in avg_radar.columns: | |
fig_radar.add_trace(go.Scatterpolar( | |
r=avg_radar[group].values, theta=categories_radar, fill='toself', name=str(group), | |
line_color=color_sequence[i % len(color_sequence)] # Cycle through colors | |
)) | |
min_scale_radar, max_scale_radar = response_scale if response_scale else (1, 6) | |
fig_radar.update_layout( | |
polar=dict(radialaxis=dict(visible=True, range=[min_scale_radar-0.5, max_scale_radar+0.5])), | |
showlegend=True, title=f"Confronto Medie Categorie Radar per {radar_demo_col}", template=PLOTLY_TEMPLATE ) | |
st.plotly_chart(fig_radar, use_container_width=True) | |
else: st.warning(f"Nessun dato medio disponibile per i gruppi selezionati.") | |
else: st.info(f"Seleziona almeno un gruppo.") | |
else: st.info(f"Solo un gruppo disponibile in '{radar_demo_col}'.") | |
else: st.info("Nessuna colonna demografica con valori multipli per il confronto Radar.") | |
else: st.info("Dati insufficienti (medie categorie, demo, melted) per il grafico Radar.") | |
st.markdown("---") | |
# --- 3. Parallel Coordinates Plot --- | |
# (Code for Parallel Coordinates - kept similar, relies on df_melted_f) | |
st.subheader("|| Parrallel Coordinates: Pattern Medie Categorie per Gruppo") | |
st.warning("Attenzione: Questo grafico può essere lento o illeggibile con molti dati/categorie.") | |
if not avg_scores_per_category_f.empty and valid_demographic_cols and not df_melted_f.empty: | |
cats_parallel_options = avg_scores_per_category_f.index.unique().tolist() | |
if cats_parallel_options: | |
default_cats_parallel = cats_parallel_options[:min(len(cats_parallel_options), 8)] | |
cats_parallel = st.multiselect("Seleziona Categorie (Dimensioni):", cats_parallel_options, default=default_cats_parallel, key="par_cats") | |
if cats_parallel: | |
parallel_demo_options = [col for col in valid_demographic_cols if df_filtered[col].nunique() > 1] | |
if parallel_demo_options: | |
parallel_demo_col = st.selectbox("Colora Linee per Gruppo Demografico:", parallel_demo_options, key="par_demo") | |
# Calculate mean scores per selected category and chosen demo group | |
df_parallel_prep = df_melted_f[df_melted_f['Categoria'].isin(cats_parallel)] | |
df_parallel = df_parallel_prep.groupby([parallel_demo_col, 'Categoria'], observed=True)['Punteggio'].mean().unstack() | |
df_parallel = df_parallel.dropna().reset_index() | |
if not df_parallel.empty and parallel_demo_col in df_parallel.columns: | |
# Map group names to numerical values for continuous color scale | |
unique_groups_par = df_parallel[parallel_demo_col].unique() | |
group_map = {name: i for i, name in enumerate(unique_groups_par)} | |
df_parallel['color_val'] = df_parallel[parallel_demo_col].map(group_map) | |
dimensions = [] | |
for cat in cats_parallel: | |
if cat in df_parallel.columns: | |
dimensions.append(dict( | |
range = [response_scale[0], response_scale[1]] if response_scale else [1,6], | |
label = str(cat)[:20] + '...' if len(str(cat))>20 else str(cat), | |
values = df_parallel[cat] )) | |
if dimensions: | |
color_palette_par = px.colors.qualitative.Plotly | |
fig_parallel = go.Figure(data= | |
go.Parcoords( | |
line = dict(color = df_parallel['color_val'], | |
colorscale = color_palette_par, # Use qualitative scale directly | |
showscale = False), | |
dimensions = dimensions )) | |
fig_parallel.update_layout( title=f"Medie Categorie per {parallel_demo_col} (Parallel Coordinates)", template=PLOTLY_TEMPLATE) | |
st.plotly_chart(fig_parallel, use_container_width=True) | |
# Manual legend | |
st.write(f"**Legenda Colori ({parallel_demo_col}):**") | |
cols_legend = st.columns(min(len(group_map), 5)) | |
i = 0 | |
for name, num in group_map.items(): | |
color = color_palette_par[num % len(color_palette_par)] | |
with cols_legend[i % min(len(group_map), 5)]: | |
st.markdown(f"<span style='color:{color}; font-weight:bold;'>■</span> {name}", unsafe_allow_html=True) | |
i += 1 | |
else: st.warning("Nessuna dimensione valida per Parallel Coordinates.") | |
else: st.warning(f"Nessun dato medio aggregato per {parallel_demo_col}.") | |
else: st.info("Nessuna colonna demografica con valori multipli per colorare le linee.") | |
else: st.info("Seleziona almeno una categoria (dimensione).") | |
else: st.info("Nessuna categoria disponibile per Parallel Coordinates.") | |
else: st.info("Dati insufficienti (medie categorie, demo, melted) per Parallel Coordinates.") | |
st.markdown("---") | |
# --- 4. Stacked Area Chart --- | |
# (Code for Stacked Area Chart - kept similar, relies on df_melted_f) | |
st.subheader("📊 Stacked Area Chart: Distribuzione Risposte per Categoria su Gruppo Ordinato") | |
if not df_melted_f.empty and valid_demographic_cols: | |
ordered_demo_options = [col for col in valid_demographic_cols if 'Eta' in col or 'Anzianita' in col] | |
if not ordered_demo_options: ordered_demo_options = valid_demographic_cols # Fallback | |
if ordered_demo_options: | |
area_demo_col = st.selectbox("Seleziona Gruppo Demografico Ordinato:", ordered_demo_options, key="area_demo") | |
area_cat_options = avg_scores_per_category_f.index.unique().tolist() | |
if area_cat_options: | |
area_category = st.selectbox("Seleziona Categoria:", area_cat_options, key="area_cat") | |
df_area_prep = df_melted_f[(df_melted_f['Categoria'] == area_category) & df_melted_f[area_demo_col].notna()].copy() | |
if not df_area_prep.empty: | |
df_area_prep['Sentiment'] = df_area_prep['Punteggio'].apply(categorize_score) | |
df_area = df_area_prep.groupby([area_demo_col, 'Sentiment'], observed=True).size().reset_index(name='Conteggio') | |
df_area['Percentuale'] = df_area.groupby(area_demo_col)['Conteggio'].transform(lambda x: x / float(x.sum()) * 100 if x.sum() > 0 else 0) | |
category_orders = {} | |
group_order = None | |
if 'Eta' in area_demo_col: | |
age_order_guess = ['Fino a 30 anni', '31-40 anni', '41-50 anni', 'Oltre i 50 anni', 'Non specificato'] | |
actual_groups = df_area[area_demo_col].unique() | |
group_order = [g for g in age_order_guess if g in actual_groups] | |
group_order.extend(sorted([g for g in actual_groups if g not in age_order_guess])) | |
category_orders={area_demo_col: group_order} | |
# Ensure Sentiment order for stacking | |
sentiment_order = ["Critico", "Neutrale", "Positivo", "Non Risposto"] | |
category_orders['Sentiment'] = [s for s in sentiment_order if s in df_area['Sentiment'].unique()] | |
plot_colors = BUCKET_COLORS.copy() | |
plot_colors["Non Risposto"] = "#bbbbbb" | |
if not df_area.empty: | |
fig_area = px.area(df_area, x=area_demo_col, y='Percentuale', color='Sentiment', | |
title=f"Distribuzione Sentiment (%) per '{area_category}' per {area_demo_col}", | |
labels={'Percentuale': '% Rispondenti'}, | |
category_orders=category_orders, | |
color_discrete_map=plot_colors, | |
template=PLOTLY_TEMPLATE) | |
fig_area.update_layout(yaxis_range=[0, 100], yaxis_ticksuffix="%") | |
st.plotly_chart(fig_area, use_container_width=True) | |
else: st.warning("Nessun dato aggregato per l'Area Chart.") | |
else: st.warning(f"Nessun dato trovato per la categoria '{area_category}'.") | |
else: st.info("Nessuna categoria valida trovata.") | |
else: st.info("Nessuna colonna demografica disponibile per l'Area Chart.") | |
else: st.info("Dati insufficienti (melted, demo) per l'Area Chart.") | |
# --- Download Button --- | |
st.sidebar.divider() | |
st.sidebar.subheader("📥 Download Dati Filtrati") | |
if df_filtered is not None and not df_filtered.empty: | |
output = BytesIO() | |
try: | |
df_to_download = df_filtered.copy() | |
df_to_download.to_csv(output, index=False, encoding='utf-8', sep=';') | |
output.seek(0) | |
st.sidebar.download_button(label="Scarica Dati Filtrati Correnti (CSV)", data=output, | |
file_name='dati_sondaggio_filtrati_avanzato.csv', mime='text/csv', key='download_csv') | |
except Exception as e: | |
st.sidebar.error(f"Errore durante la creazione del CSV: {e}") | |
else: | |
st.sidebar.info("Nessun dato filtrato da scaricare.") | |
# --- Footer --- | |
st.markdown("---") | |
# Use a dynamic timestamp | |
try: | |
current_time_str = pd.Timestamp.now(tz='Europe/Rome').strftime('%Y-%m-%d %H:%M:%S %Z') | |
except Exception: # Fallback if timezone fails | |
current_time_str = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') | |
st.caption(f"Dashboard Analisi Clima") | |
# Altrimenti (se uploaded_file is None), non mostra nulla tranne l'uploader | |
else: | |
st.title("🚀 Dashboard Analisi Clima") | |
st.info("Per iniziare, carica un file CSV usando il widget qui sopra.") |