import streamlit as st import pandas as pd import numpy as np from scipy import stats class Analyzer: def perform_analysis(self, df): analysis_type = st.selectbox("Select analysis type", ["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"]) if analysis_type == "Descriptive Statistics": st.write(df.describe()) if st.checkbox("Show additional statistics"): st.write("Skewness:") st.write(df.skew()) st.write("Kurtosis:") st.write(df.kurtosis()) elif analysis_type == "Correlation Analysis": corr_matrix = df.corr() st.write(corr_matrix) if st.checkbox("Show heatmap"): fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r') st.plotly_chart(fig) elif analysis_type == "Hypothesis Testing": test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"]) if test_type == "T-Test": col1 = st.selectbox("Select first column", df.columns) col2 = st.selectbox("Select second column", df.columns) t_stat, p_value = stats.ttest_ind(df[col1], df[col2]) st.write(f"T-statistic: {t_stat}") st.write(f"P-value: {p_value}") elif test_type == "ANOVA": grouping_col = st.selectbox("Select grouping column", df.columns) value_col = st.selectbox("Select value column", df.columns) groups = [group for name, group in df.groupby(grouping_col)[value_col]] f_stat, p_value = stats.f_oneway(*groups) st.write(f"F-statistic: {f_stat}") st.write(f"P-value: {p_value}") elif test_type == "Chi-Square": col1 = st.selectbox("Select first column", df.columns) col2 = st.selectbox("Select second column", df.columns) contingency_table = pd.crosstab(df[col1], df[col2]) chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table) st.write(f"Chi-square statistic: {chi2}") st.write(f"P-value: {p_value}") elif analysis_type == "Custom Query": query = st.text_input("Enter a custom query (e.g., 'column_name > 5')") if query: try: result = df.query(query) st.write(result) except Exception as e: st.error(f"Error in query: {str(e)}")