Spaces:

bangaboy
/

pythonnew

Sleeping

App Files Files Community

pythonnew / app.py

bangaboy

Update app.py

3af41db verified 5 months ago

raw

history blame contribute delete

15.6 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.linear_model import LinearRegression
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import train_test_split

	# Set page configuration with custom theme
	st.set_page_config(
	page_title="Data Analytics Hub",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main {
	padding-top: 2rem;
	}
	.stButton>button {
	width: 100%;
	border-radius: 5px;
	height: 3em;
	background-color: #ff4b4b;
	color: white;
	border: none;
	}
	.stButton>button:hover {
	background-color: #ff6b6b;
	color: white;
	}
	div[data-testid="stSidebarNav"] {
	background-image: linear-gradient(#f0f2f6, #e0e2e6);
	padding: 2rem 0;
	border-radius: 10px;
	}
	.css-1d391kg {
	padding: 2rem 1rem;
	}
	.stAlert {
	padding: 1rem;
	border-radius: 5px;
	}
	div[data-testid="stMetricValue"] {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 5px;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize session state
	if 'data' not in st.session_state:
	# Create sample data
	np.random.seed(42)
	dates = pd.date_range('2023-01-01', periods=100, freq='D')
	st.session_state.data = pd.DataFrame({
	'date': dates,
	'sales': np.random.normal(1000, 200, 100),
	'visitors': np.random.normal(500, 100, 100),
	'conversion_rate': np.random.uniform(0.01, 0.05, 100),
	'customer_satisfaction': np.random.normal(4.2, 0.5, 100),
	'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
	})

	# Sidebar with enhanced styling
	with st.sidebar:
	st.image("https://via.placeholder.com/150?text=Analytics+Hub", width=150)
	st.title("Analytics Hub")
	selected_page = st.radio(
	"📑 Navigation",
	["🏠 Dashboard", "🔍 Data Explorer", "📊 Visualization", "🤖 ML Predictions"],
	key="navigation"
	)

	# Dashboard page
	if selected_page == "🏠 Dashboard":
	st.title("📊 Data Analytics Dashboard")

	# Quick stats in a grid
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric(
	"Total Records",
	f"{len(st.session_state.data):,}",
	"Current dataset size"
	)

	with col2:
	st.metric(
	"Avg Sales",
	f"${st.session_state.data['sales'].mean():,.2f}",
	f"{st.session_state.data['sales'].pct_change().mean()*100:.1f}%"
	)

	with col3:
	st.metric(
	"Avg Visitors",
	f"{st.session_state.data['visitors'].mean():,.0f}",
	f"{st.session_state.data['visitors'].pct_change().mean()*100:.1f}%"
	)

	with col4:
	st.metric(
	"Satisfaction",
	f"{st.session_state.data['customer_satisfaction'].mean():.2f}",
	"Average rating"
	)

	# Data upload section with better styling
	st.markdown("### 📁 Upload Your Dataset")
	upload_col1, upload_col2 = st.columns([2, 3])

	with upload_col1:
	uploaded_file = st.file_uploader(
	"Choose a CSV file",
	type="csv",
	help="Upload your CSV file to begin analysis"
	)
	if uploaded_file is not None:
	try:
	st.session_state.data = pd.read_csv(uploaded_file)
	st.success("✅ Data uploaded successfully!")
	except Exception as e:
	st.error(f"❌ Error uploading file: {e}")

	with upload_col2:
	st.markdown("#### Dataset Preview")
	st.dataframe(
	st.session_state.data.head(3),
	use_container_width=True
	)
	# Data Explorer page
	elif selected_page == "🔍 Data Explorer":
	st.title("🔍 Data Explorer")

	# Enhanced data summary
	col1, col2 = st.columns([1, 2])

	with col1:
	st.markdown("### 📊 Dataset Overview")
	st.info(f"""
	- Rows: {st.session_state.data.shape[0]:,}
	- Columns: {st.session_state.data.shape[1]}
	- Memory Usage: {st.session_state.data.memory_usage().sum() / 1024**2:.2f} MB
	""")

	with col2:
	st.markdown("### 📈 Quick Stats")
	st.dataframe(
	st.session_state.data.describe(),
	use_container_width=True
	)

	# Column analysis with better visualization
	st.markdown("### 🔬 Column Analysis")

	col1, col2, col3 = st.columns([1, 1, 2])

	with col1:
	column = st.selectbox(
	"Select column:",
	st.session_state.data.columns,
	help="Choose a column to analyze"
	)

	with col2:
	if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
	analysis_type = st.selectbox(
	"Analysis type:",
	["Distribution", "Time Series"] if "date" in column.lower() else ["Distribution"],
	help="Choose type of analysis"
	)
	else:
	analysis_type = "Value Counts"

	with col3:
	if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
	stats_col1, stats_col2 = st.columns(2)
	with stats_col1:
	st.metric("Mean", f"{st.session_state.data[column].mean():.2f}")
	st.metric("Std Dev", f"{st.session_state.data[column].std():.2f}")
	with stats_col2:
	st.metric("Median", f"{st.session_state.data[column].median():.2f}")
	st.metric("IQR", f"{st.session_state.data[column].quantile(0.75) - st.session_state.data[column].quantile(0.25):.2f}")

	# Enhanced visualization
	fig, ax = plt.subplots(figsize=(12, 6))
	if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
	sns.set_style("whitegrid")
	sns.histplot(data=st.session_state.data, x=column, kde=True, ax=ax)
	ax.set_title(f"Distribution of {column}", pad=20)
	else:
	value_counts = st.session_state.data[column].value_counts()
	sns.barplot(x=value_counts.index, y=value_counts.values, ax=ax)
	ax.set_title(f"Value Counts for {column}", pad=20)
	plt.xticks(rotation=45)

	st.pyplot(fig)
	# Visualization page
	elif selected_page == "📊 Visualization":
	st.title("📊 Advanced Visualizations")

	# Enhanced chart selection
	chart_type = st.selectbox(
	"Select visualization type:",
	["📊 Bar Chart", "📈 Line Chart", "🔵 Scatter Plot", "🌡️ Heatmap"],
	help="Choose the type of visualization you want to create"
	)

	if chart_type in ["📊 Bar Chart", "📈 Line Chart"]:
	col1, col2, col3 = st.columns([1, 1, 1])

	with col1:
	x_column = st.selectbox("X-axis:", st.session_state.data.columns)

	with col2:
	y_column = st.selectbox(
	"Y-axis:",
	[col for col in st.session_state.data.columns
	if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
	)

	with col3:
	color_theme = st.selectbox(
	"Color theme:",
	["viridis", "magma", "plasma", "inferno"]
	)

	# Create enhanced visualization
	fig, ax = plt.subplots(figsize=(12, 6))
	sns.set_style("whitegrid")
	sns.set_palette(color_theme)

	if not pd.api.types.is_numeric_dtype(st.session_state.data[x_column]):
	agg_data = st.session_state.data.groupby(x_column)[y_column].mean().reset_index()

	if "Bar" in chart_type:
	sns.barplot(x=x_column, y=y_column, data=agg_data, ax=ax)
	else:
	sns.lineplot(x=x_column, y=y_column, data=agg_data, ax=ax, marker='o')
	else:
	if "Bar" in chart_type:
	sns.barplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
	else:
	sns.lineplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)

	plt.xticks(rotation=45)
	ax.set_title(f"{y_column} by {x_column}", pad=20)
	st.pyplot(fig)

	elif "Scatter" in chart_type:
	col1, col2, col3 = st.columns([1, 1, 1])

	with col1:
	x_column = st.selectbox(
	"X-axis:",
	[col for col in st.session_state.data.columns
	if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
	)

	with col2:
	y_column = st.selectbox(
	"Y-axis:",
	[col for col in st.session_state.data.columns
	if pd.api.types.is_numeric_dtype(st.session_state.data[col]) and col != x_column]
	)

	with col3:
	hue_column = st.selectbox(
	"Color by:",
	["None"] + list(st.session_state.data.columns)
	)

	fig, ax = plt.subplots(figsize=(12, 6))
	sns.set_style("whitegrid")

	if hue_column != "None":
	sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, hue=hue_column, ax=ax)
	else:
	sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)

	ax.set_title(f"{y_column} vs {x_column}", pad=20)
	st.pyplot(fig)

	elif "Heatmap" in chart_type:
	st.markdown("### 🌡️ Correlation Heatmap")

	numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
	correlation = st.session_state.data[numeric_cols].corr()

	fig, ax = plt.subplots(figsize=(12, 8))
	mask = np.triu(np.ones_like(correlation))
	sns.heatmap(
	correlation,
	mask=mask,
	annot=True,
	cmap='coolwarm',
	ax=ax,
	center=0,
	square=True,
	fmt='.2f',
	linewidths=1
	)
	ax.set_title("Correlation Heatmap", pad=20)
	st.pyplot(fig)
	# ML Predictions page
	elif selected_page == "🤖 ML Predictions":
	st.title("🤖 Machine Learning Predictions")

	# Model configuration
	st.markdown("### ⚙️ Model Configuration")

	config_col1, config_col2 = st.columns(2)

	with config_col1:
	numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
	target_column = st.selectbox(
	"Target variable:",
	numeric_cols,
	help="Select the variable you want to predict"
	)

	with config_col2:
	model_type = st.selectbox(
	"Model type:",
	["📊 Linear Regression", "🌲 Random Forest"],
	help="Choose the type of model to train"
	)

	# Feature selection with better UI
	st.markdown("### 🎯 Feature Selection")
	feature_cols = [col for col in numeric_cols if col != target_column]
	selected_features = st.multiselect(
	"Select features for the model:",
	feature_cols,
	default=feature_cols,
	help="Choose the variables to use as predictors"
	)

	# Model training section
	train_col1, train_col2 = st.columns([2, 1])

	with train_col1:
	if st.button("🚀 Train Model", use_container_width=True):
	if len(selected_features) > 0:
	with st.spinner("Training model..."):
	# Prepare data
	X = st.session_state.data[selected_features]
	y = st.session_state.data[target_column]

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	if "Linear" in model_type:
	model = LinearRegression()
	else:
	model = RandomForestRegressor(n_estimators=100, random_state=42)

	model.fit(X_train_scaled, y_train)

	# Store model and scaler in session state
	st.session_state.model = model
	st.session_state.scaler = scaler
	st.session_state.features = selected_features

	# Model evaluation
	train_score = model.score(X_train_scaled, y_train)
	test_score = model.score(X_test_scaled, y_test)

	st.success("✨ Model trained successfully!")

	# Display metrics
	metric_col1, metric_col2 = st.columns(2)
	with metric_col1:
	st.metric("Training R² Score", f"{train_score:.4f}")
	with metric_col2:
	st.metric("Testing R² Score", f"{test_score:.4f}")

	# Feature importance for Random Forest
	if "Random" in model_type:
	st.markdown("### 📊 Feature Importance")
	importance = pd.DataFrame({
	'Feature': selected_features,
	'Importance': model.feature_importances_
	}).sort_values('Importance', ascending=False)

	fig, ax = plt.subplots(figsize=(10, 6))
	sns.barplot(x='Importance', y='Feature', data=importance, ax=ax)
	ax.set_title("Feature Importance")
	st.pyplot(fig)
	else:
	st.error("⚠️ Please select at least one feature")

	# Prediction section
	st.markdown("### 🎯 Make Predictions")
	if 'model' in st.session_state:
	pred_col1, pred_col2 = st.columns([2, 1])

	with pred_col1:
	st.markdown("#### Input Features")
	input_data = {}

	# Create input fields for each feature
	for feature in st.session_state.features:
	min_val = float(st.session_state.data[feature].min())
	max_val = float(st.session_state.data[feature].max())
	mean_val = float(st.session_state.data[feature].mean())

	input_data[feature] = st.slider(
	f"{feature}:",
	min_value=min_val,
	max_value=max_val,
	value=mean_val,
	help=f"Range: {min_val:.2f} to {max_val:.2f}"
	)

	with pred_col2:
	if st.button("🎯 Predict", use_container_width=True):
	input_df = pd.DataFrame([input_data])
	input_scaled = st.session_state.scaler.transform(input_df)
	prediction = st.session_state.model.predict(input_scaled)[0]

	st.success(f"Predicted {target_column}: {prediction:.2f}")
	else:
	st.info("ℹ️ Train a model first to make predictions")