|
import streamlit as st |
|
from src.preprocessing.clean_data import cached_clean_csv |
|
import pandas as pd |
|
from functools import lru_cache |
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def get_static_content(): |
|
"""Cache static HTML content to avoid regeneration.""" |
|
welcome_header = """ |
|
<div class="welcome-header" style="text-align: left; margin-bottom: 2rem;"> |
|
<h1>Experience Ai like never before</h1> |
|
<p class="subtitle"> |
|
Performance, Analysis, Insights Made Simple. |
|
</p> |
|
</div> |
|
""" |
|
features_header = "## β¨ Key Features" |
|
feature_cards = [ |
|
""" |
|
<div class="feature-card"> |
|
<h3>π Data Analysis</h3> |
|
<ul> |
|
<li>Automated data cleaning</li> |
|
<li>Interactive visualizations</li> |
|
<li>Statistical insights</li> |
|
<li>Correlation analysis</li> |
|
</ul> |
|
</div> |
|
""", |
|
""" |
|
<div class="feature-card"> |
|
<h3>π€ Machine Learning</h3> |
|
<ul> |
|
<li>Multiple ML algorithms</li> |
|
<li>Automated model selection</li> |
|
<li>Hyperparameter tuning</li> |
|
<li>Performance metrics</li> |
|
</ul> |
|
</div> |
|
""", |
|
""" |
|
<div class="feature-card"> |
|
<h3>π AI Insights</h3> |
|
<ul> |
|
<li>Data quality checks</li> |
|
<li>Feature importance</li> |
|
<li>Model explanations</li> |
|
<li>Smart recommendations</li> |
|
</ul> |
|
</div> |
|
""" |
|
] |
|
getting_started = """ |
|
## π Getting Started |
|
1. **Upload Your Dataset**: Use the sidebar to upload your CSV file |
|
2. **Explore Data**: View statistics and visualizations in the Overview tab |
|
3. **Train Models**: Select algorithms and tune parameters |
|
4. **Get Insights**: Receive AI-powered recommendations |
|
""" |
|
dataset_requirements = """ |
|
* File format: CSV |
|
* Maximum size: 200MB |
|
* Supported column types: |
|
* Numeric (int, float) |
|
* Categorical (string, boolean) |
|
* Temporal (date, datetime) |
|
* Clean data preferred, but not required |
|
""" |
|
example_datasets = """ |
|
Try these example datasets to explore the app: |
|
* [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris) |
|
* [Boston Housing](https://www.kaggle.com/c/boston-housing) |
|
* [Wine Quality](https://archive.ics.uci.edu/ml/datasets/wine+quality) |
|
""" |
|
return welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets |
|
|
|
def show_welcome_page(): |
|
"""Display welcome page with features and instructions efficiently.""" |
|
|
|
welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets = get_static_content() |
|
|
|
|
|
st.markdown(welcome_header, unsafe_allow_html=True) |
|
st.markdown(features_header, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
col1, col2, col3 = st.columns(3, gap="medium") |
|
with col1: |
|
st.markdown(feature_cards[0], unsafe_allow_html=True) |
|
with col2: |
|
st.markdown(feature_cards[1], unsafe_allow_html=True) |
|
with col3: |
|
st.markdown(feature_cards[2], unsafe_allow_html=True) |
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(getting_started, unsafe_allow_html=True) |
|
with st.expander("π Dataset Requirements"): |
|
st.markdown(dataset_requirements) |
|
|
|
with st.expander("π― Example Datasets"): |
|
st.markdown(example_datasets) |
|
|
|
|
|
|
|
|
|
st.markdown("### π€ Upload Your Dataset (Currently Using Default Dataset)") |
|
|
|
|
|
skip_cleaning = st.checkbox("My dataset is already cleaned (skip cleaning)") |
|
|
|
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) |
|
|
|
if uploaded_file is not None: |
|
try: |
|
|
|
file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size} |
|
if uploaded_file.size > 200 * 1024 * 1024: |
|
st.error("β File size exceeds 200MB limit. Please upload a smaller file.") |
|
return |
|
|
|
|
|
try: |
|
df = pd.read_csv(uploaded_file) |
|
if df.empty: |
|
st.error("β The uploaded file is empty. Please upload a file with data.") |
|
return |
|
|
|
st.success("β
Dataset uploaded successfully!") |
|
except pd.errors.EmptyDataError: |
|
st.error("β The uploaded file is empty. Please upload a file with data.") |
|
return |
|
except pd.errors.ParserError: |
|
st.error("β Unable to parse the CSV file. Please ensure it's properly formatted.") |
|
return |
|
|
|
|
|
df_json = df.to_json(orient='records') |
|
|
|
|
|
with st.spinner("π§ AI is analyzing and cleaning the data..." if not skip_cleaning else "Processing dataset..."): |
|
try: |
|
cleaned_df, insights = cached_clean_csv(df_json, skip_cleaning) |
|
except Exception as cleaning_error: |
|
st.error(f"β Error during data cleaning: {str(cleaning_error)}") |
|
|
|
st.warning("β οΈ Using original dataset without cleaning due to errors.") |
|
cleaned_df = df |
|
insights = "Cleaning failed, using original data." |
|
|
|
|
|
st.session_state.df = cleaned_df |
|
st.session_state.insights = insights |
|
st.session_state.data_cleaned = True |
|
st.session_state.dataset_loaded = True |
|
|
|
|
|
st.session_state.is_user_uploaded = True |
|
|
|
|
|
|
|
st.session_state.original_df_json = df_json |
|
st.session_state.skip_cleaning = skip_cleaning |
|
|
|
|
|
if "column_types" in st.session_state: |
|
del st.session_state.column_types |
|
if "corr_matrix" in st.session_state: |
|
del st.session_state.corr_matrix |
|
if "df_hash" in st.session_state: |
|
del st.session_state.df_hash |
|
if "test_results_calculated" in st.session_state: |
|
st.session_state.test_results_calculated = False |
|
|
|
if skip_cleaning: |
|
st.success("β
Using uploaded dataset as-is (skipped cleaning).") |
|
else: |
|
st.success("β
Data cleaned successfully!") |
|
|
|
except Exception as e: |
|
st.error(f"β Error processing dataset: {str(e)}") |
|
st.info("βΉοΈ Please check that your file is a valid CSV and try again.") |
|
|