Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 7, 2025

Commit

1ce2ad9

verified ·

1 Parent(s): 23e4994

Update app.py

Browse files

Files changed (1) hide show

app.py +415 -464

app.py CHANGED Viewed

@@ -8,63 +8,65 @@ from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorizati
 from NoCodeTextClassifier.models import Models
 import os
 import pickle
-import io
-import base64
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
-from sklearn.preprocessing import LabelEncoder
-# Configure page
-st.set_page_config(page_title="Text Classifier", page_icon="📝", layout="wide")
-# Utility functions
-def safe_read_csv(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
-    """Safely read CSV with multiple encoding attempts"""
-    if uploaded_file is None:
-        return None
-    # Reset file pointer
-    uploaded_file.seek(0)
-    for encoding in encoding_options:
-        try:
-            # Read the file content as bytes
-            bytes_data = uploaded_file.read()
-            # Convert bytes to string with the current encoding
-            string_data = bytes_data.decode(encoding)
-            # Use StringIO to create a file-like object
-            df = pd.read_csv(io.StringIO(string_data))
-            st.success(f"File loaded successfully with {encoding} encoding")
-            return df
-        except (UnicodeDecodeError, pd.errors.EmptyDataError, pd.errors.ParserError) as e:
-            st.warning(f"Failed to read with {encoding} encoding: {str(e)}")
-            continue
-        except Exception as e:
-            st.error(f"Unexpected error with {encoding} encoding: {str(e)}")
-            continue
-    st.error("Failed to read the file with any supported encoding")
-    return None
-def create_sample_data():
-    """Create sample data for testing"""
-    sample_data = {
-        'text': [
-            "I love this product, it's amazing!",
-            "This is the worst thing I've ever bought",
-            "Great quality and fast delivery",
-            "Terrible customer service, very disappointed",
-            "Excellent value for money",
-            "Poor quality, broke after one day",
-            "Highly recommend this to everyone",
-            "Waste of money, don't buy this"
-        ],
-        'sentiment': ['positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative']
-    }
-    return pd.DataFrame(sample_data)
 def save_artifacts(obj, folder_name, file_name):
     """Save artifacts like encoders and vectorizers"""
     try:
@@ -82,7 +84,7 @@ def load_artifacts(folder_name, file_name):
         with open(os.path.join(folder_name, file_name), 'rb') as f:
             return pickle.load(f)
     except FileNotFoundError:
-        st.error(f"File {file_name} not found in {folder_name} folder")
         return None
     except Exception as e:
         st.error(f"Error loading {file_name}: {str(e)}")
@@ -100,6 +102,29 @@ def load_model(model_name):
         st.error(f"Error loading model: {str(e)}")
         return None
 def predict_text(model_name, text, vectorizer_type="tfidf"):
     """Make prediction on new text"""
     try:
@@ -146,293 +171,210 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
         st.error(f"Error during prediction: {str(e)}")
         return None, None
-def download_sample_csv():
-    """Generate sample CSV for download"""
-    sample_df = create_sample_data()
-    csv = sample_df.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()
-    href = f'<a href="data:file/csv;base64,{b64}" download="sample_data.csv">Download Sample CSV</a>'
-    return href
-# Main App
-st.title('📝 No Code Text Classification App')
-st.markdown('---')
-st.write('Understand the behavior of your text data and train a model to classify the text data')
-# Initialize session state
-if 'vectorizer_type' not in st.session_state:
-    st.session_state.vectorizer_type = "tfidf"
-if 'train_df' not in st.session_state:
-    st.session_state.train_df = None
-# Sidebar
-st.sidebar.title("Navigation")
-section = st.sidebar.radio("Choose Section", ["📊 Data Analysis", "🔧 Train Model", "🎯 Predictions"])
-# Data Upload Section
-st.sidebar.markdown("---")
-st.sidebar.subheader("📁 Data Upload")
-# Option to use sample data
-if st.sidebar.button("Use Sample Data"):
-    st.session_state.train_df = create_sample_data()
-    st.sidebar.success("Sample data loaded!")
-# Sample data download
-st.sidebar.markdown("**Download Sample Data:**")
-st.sidebar.markdown(download_sample_csv(), unsafe_allow_html=True)
-st.sidebar.markdown("**Or upload your own data:**")
-# File upload with better error handling
-train_data = st.sidebar.file_uploader(
-    "Upload training data",
-    type=["csv"],
-    help="Upload a CSV file with text and target columns"
-)
-test_data = st.sidebar.file_uploader(
-    "Upload test data (optional)",
-    type=["csv"],
-    help="Optional: Upload separate test data"
-)
-# Alternative text input method
-st.sidebar.markdown("**Or paste CSV data:**")
-if st.sidebar.checkbox("Enter data manually"):
-    csv_text = st.sidebar.text_area(
-        "Paste CSV data here:",
-        height=100,
-        placeholder="text,sentiment\n\"Great product!\",positive\n\"Poor quality\",negative"
     )
-    if csv_text and st.sidebar.button("Load from text"):
-        try:
-            train_df = pd.read_csv(io.StringIO(csv_text))
-            st.session_state.train_df = train_df
-            st.sidebar.success("Data loaded from text!")
-        except Exception as e:
-            st.sidebar.error(f"Error parsing CSV text: {str(e)}")
-# Load data
-train_df = None
-test_df = None
-# Try to load from uploaded file first
-if train_data is not None:
-    train_df = safe_read_csv(train_data)
-    if train_df is not None:
-        st.session_state.train_df = train_df
-# Use session state data if available
-if st.session_state.train_df is not None:
-    train_df = st.session_state.train_df
-if test_data is not None:
-    test_df = safe_read_csv(test_data)
-# Process data if available
-if train_df is not None:
-    try:
-        st.sidebar.success("✅ Training data loaded successfully!")
-        # Show data info in sidebar
-        st.sidebar.write(f"**Rows:** {len(train_df)}")
-        st.sidebar.write(f"**Columns:** {len(train_df.columns)}")
-        with st.expander("📋 Data Preview", expanded=False):
-            st.write("**Training Data Preview:**")
-            st.dataframe(train_df.head())
-        columns = train_df.columns.tolist()
-        # Column selection with validation
-        if len(columns) >= 2:
-            text_data = st.sidebar.selectbox("Choose the text column:", columns, index=0)
-            # Default to second column for target, or first if same as text
-            target_default = 1 if len(columns) > 1 and columns[1] != text_data else 0
-            target = st.sidebar.selectbox("Choose the target column:", columns, index=target_default)
-            if text_data == target:
-                st.sidebar.error("Text and target columns must be different!")
-                st.stop()
-        else:
-            st.sidebar.error("Data must have at least 2 columns (text and target)")
-            st.stop()
-        # Process data
-        try:
-            info = Informations(train_df, text_data, target)
-            train_df['clean_text'] = info.clean_text()
-            train_df['text_length'] = info.text_length()
-            # Handle label encoding
-            label_encoder = LabelEncoder()
-            train_df['target'] = label_encoder.fit_transform(train_df[target])
-            # Save label encoder
-            save_artifacts(label_encoder, "artifacts", "encoder.pkl")
-        except Exception as e:
-            st.error(f"Error processing data: {str(e)}")
-            st.stop()
-    except Exception as e:
-        st.error(f"Error loading data: {str(e)}")
-        train_df = None
-# Main Content Based on Section
-if section == "📊 Data Analysis":
-    if train_df is not None:
-        try:
-            st.header("📊 Data Analysis & Insights")
-            # Create columns for metrics
-            col1, col2, col3, col4 = st.columns(4)
-            with col1:
-                st.metric("Total Samples", info.shape()[0])
-            with col2:
-                st.metric("Features", info.shape()[1])
-            with col3:
-                st.metric("Classes", len(train_df[target].unique()))
-            with col4:
-                missing_pct = (info.missing_values().sum() / len(train_df)) * 100
-                st.metric("Missing Data %", f"{missing_pct:.1f}%")
-            st.markdown("---")
-            # Class distribution
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("Class Distribution")
-                class_dist = train_df[target].value_counts()
-                st.bar_chart(class_dist)
-                # Check for imbalance
-                imbalance_ratio = class_dist.max() / class_dist.min()
-                if imbalance_ratio > 2:
-                    st.warning(f"⚠️ Class imbalance detected (ratio: {imbalance_ratio:.1f}:1)")
                 else:
-                    st.success("✅ Classes are relatively balanced")
-            with col2:
-                st.subheader("Text Length Distribution")
-                fig, ax = plt.subplots(figsize=(8, 6))
-                ax.hist(train_df['text_length'], bins=30, alpha=0.7, color='skyblue')
-                ax.set_xlabel('Text Length (characters)')
-                ax.set_ylabel('Frequency')
-                ax.set_title('Distribution of Text Lengths')
-                st.pyplot(fig)
-            # Detailed analysis
-            with st.expander("📈 Detailed Analysis", expanded=False):
-                st.write("**Class Imbalance Analysis:**")
-                st.write(info.class_imbalanced())
-                st.write("**Missing Values:**")
-                st.write(info.missing_values())
-                st.write("**Text Length Statistics:**")
-                st.write(info.analysis_text_length('text_length'))
-                # Correlation
-                correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
-                st.write(f"**Correlation between Text Length and Target:** {correlation:.4f}")
-                if abs(correlation) > 0.3:
-                    st.info(f"📊 Moderate correlation detected ({correlation:.3f})")
-                elif abs(correlation) > 0.1:
-                    st.info(f"📊 Weak correlation detected ({correlation:.3f})")
-                else:
-                    st.info("📊 No significant correlation between text length and target")
-        except Exception as e:
-            st.error(f"Error in data analysis: {str(e)}")
-    else:
-        st.warning("📤 Please upload training data or use sample data to get insights")
-        # Show instructions
-        st.info("""
-        **To get started:**
-        1. Click "Use Sample Data" in the sidebar, OR
-        2. Upload your own CSV file with text and target columns, OR
-        3. Use the manual text input option in the sidebar
-        """)
-# Train Model Section
-elif section == "🔧 Train Model":
-    if train_df is not None:
-        try:
-            st.header("🔧 Train Classification Model")
-            # Model and vectorizer selection
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("Choose Model")
-                model = st.selectbox("Select Algorithm:", [
-                    "Logistic Regression", "Decision Tree",
-                    "Random Forest", "Linear SVC", "SVC",
-                    "Multinomial Naive Bayes", "Gaussian Naive Bayes"
-                ], help="Different algorithms have different strengths")
-            with col2:
-                st.subheader("Choose Vectorizer")
-                vectorizer_choice = st.selectbox("Select Vectorization Method:",
-                    ["Tfidf Vectorizer", "Count Vectorizer"],
-                    help="TF-IDF is usually better for text classification")
-            # Initialize vectorizer
-            if vectorizer_choice == "Tfidf Vectorizer":
-                vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
-                st.session_state.vectorizer_type = "tfidf"
-            else:
-                vectorizer = CountVectorizer(max_features=10000, stop_words='english')
-                st.session_state.vectorizer_type = "count"
-            # Show processed data preview
-            with st.expander("🔍 Processed Data Preview", expanded=False):
-                preview_df = train_df[['clean_text', 'target']].head(10)
-                st.dataframe(preview_df)
-            st.markdown("---")
-            # Training section
-            if st.button("🚀 Start Training", type="primary"):
-                with st.spinner("Training model... This may take a few moments."):
-                    try:
-                        # Progress bar
-                        progress_bar = st.progress(0)
-                        status_text = st.empty()
-                        status_text.text("Vectorizing text data...")
                         progress_bar.progress(20)
-                        # Vectorize text data
-                        X = vectorizer.fit_transform(train_df['clean_text'])
-                        y = train_df['target']
-                        status_text.text("Splitting data...")
-                        progress_bar.progress(40)
-                        # Split data
-                        X_train, X_test, y_train, y_test = process.split_data(X, y)
-                        status_text.text("Saving vectorizer...")
                         progress_bar.progress(50)
-                        # Save vectorizer
-                        vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
-                        save_artifacts(vectorizer, "artifacts", vectorizer_filename)
-                        status_text.text(f"Training {model}...")
-                        progress_bar.progress(70)
-                        # Train model
-                        models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
                         if model == "Logistic Regression":
                             models.LogisticRegression()
                         elif model == "Decision Tree":
@@ -450,162 +392,171 @@ elif section == "🔧 Train Model":
                         progress_bar.progress(100)
                         status_text.text("Training completed!")
-                        st.success("🎉 Model training completed successfully!")
-                        st.balloons()
-                        # Show training info
-                        st.info(f"""
-                        **Training Summary:**
-                        - Model: {model}
-                        - Vectorizer: {vectorizer_choice}
-                        - Training samples: {X_train.shape[0]}
-                        - Test samples: {X_test.shape[0]}
-                        - Features: {X_train.shape[1]}
-                        """)
-                    except Exception as e:
-                        st.error(f"Training failed: {str(e)}")
-        except Exception as e:
-            st.error(f"Error in model training setup: {str(e)}")
-    else:
-        st.warning("📤 Please upload training data to train a model")
-# Predictions Section
-elif section == "🎯 Predictions":
-    st.header("🎯 Make Predictions")
-    # Check if models exist
-    if os.path.exists("models") and os.listdir("models"):
-        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-        if available_models:
-            # Single prediction
-            st.subheader("Single Text Prediction")
-            col1, col2 = st.columns([3, 1])
-            with col1:
-                text_input = st.text_area(
-                    "Enter text to classify:",
-                    height=100,
-                    placeholder="Type or paste your text here..."
-                )
-            with col2:
-                selected_model = st.selectbox("Choose model:", available_models)
-                predict_btn = st.button("🎯 Predict", type="primary")
-            if predict_btn and text_input.strip():
-                with st.spinner("Making prediction..."):
-                    predicted_label, prediction_proba = predict_text(
-                        selected_model,
-                        text_input,
-                        st.session_state.get('vectorizer_type', 'tfidf')
-                    )
-                    if predicted_label is not None:
-                        st.success("Prediction completed!")
-                        # Results in columns
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.markdown("### 📝 Input Text")
-                            st.text_area("", value=text_input, height=100, disabled=True)
-                        with col2:
-                            st.markdown("### 🎯 Prediction Result")
-                            st.markdown(f"**Predicted Class:** `{predicted_label}`")
-                            # Show probabilities if available
-                            if prediction_proba is not None:
-                                encoder = load_artifacts("artifacts", "encoder.pkl")
-                                if encoder is not None:
-                                    classes = encoder.classes_
-                                    prob_df = pd.DataFrame({
-                                        'Class': classes,
-                                        'Probability': prediction_proba
-                                    }).sort_values('Probability', ascending=False)
-                                    st.markdown("**Confidence Scores:**")
-                                    # Show as progress bars
-                                    for _, row in prob_df.iterrows():
-                                        st.write(f"{row['Class']}: {row['Probability']:.3f}")
-                                        st.progress(row['Probability'])
-            elif predict_btn and not text_input.strip():
-                st.warning("Please enter some text to classify")
-            st.markdown("---")
-            # Batch prediction
-            st.subheader("Batch Predictions")
-            uploaded_file = st.file_uploader(
-                "Upload CSV file for batch predictions",
-                type=['csv'],
-                help="Upload a CSV with a text column to classify multiple texts at once"
-            )
-            if uploaded_file is not None:
-                batch_df = safe_read_csv(uploaded_file)
-                if batch_df is not None:
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        text_column = st.selectbox("Select text column:", batch_df.columns.tolist())
-                    with col2:
-                        batch_model = st.selectbox("Choose model:", available_models, key="batch_model")
-                    st.write("**Data Preview:**")
-                    st.dataframe(batch_df.head())
-                    if st.button("🚀 Run Batch Predictions"):
-                        with st.spinner("Processing batch predictions..."):
-                            predictions = []
-                            # Progress tracking
-                            progress_bar = st.progress(0)
-                            total_texts = len(batch_df)
-                            for i, text in enumerate(batch_df[text_column]):
-                                pred, _ = predict_text(
-                                    batch_model,
-                                    str(text),
-                                    st.session_state.get('vectorizer_type', 'tfidf')
-                                )
-                                predictions.append(pred if pred is not None else "Error")
-                                progress_bar.progress((i + 1) / total_texts)
-                            batch_df['Predicted_Class'] = predictions
-                            st.success("✅ Batch predictions completed!")
-                            # Results
-                            st.write("**Results:**")
-                            st.dataframe(batch_df[[text_column, 'Predicted_Class']])
-                            # Download button
-                            csv = batch_df.to_csv(index=False)
-                            st.download_button(
-                                label="⬇️ Download Results",
-                                data=csv,
-                                file_name="batch_predictions.csv",
-                                mime="text/csv"
-                            )
-                            # Show prediction distribution
-                            pred_dist = batch_df['Predicted_Class'].value_counts()
-                            st.bar_chart(pred_dist)
         else:
-            st.warning("No trained models found.")
-    else:
-        st.warning("🔧 No models available. Please train a model first in the 'Train Model' section.")
-# Footer
-st.markdown("---")
-st.markdown("*Built with Streamlit • No-Code Text Classification*")

 from NoCodeTextClassifier.models import Models
 import os
 import pickle
+import hashlib
+import hmac
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+# Authentication Configuration
+USERS = {
+    "admin": "admin123",
+    "user1": "password123",
+    "demo": "demo123"
+}
+def check_password():
+    """Returns True if the user has correct password."""
+    def password_entered():
+        """Checks whether a password entered by the user is correct."""
+        username = st.session_state["username"]
+        password = st.session_state["password"]
+        if username in USERS and hmac.compare_digest(USERS[username], password):
+            st.session_state["password_correct"] = True
+            st.session_state["authenticated_user"] = username
+            del st.session_state["password"]  # Don't store passwords
+        else:
+            st.session_state["password_correct"] = False
+    # Return True if password is validated
+    if st.session_state.get("password_correct", False):
+        return True
+    # Show login form
+    st.markdown("## 🔐 Login Required")
+    st.markdown("Please enter your credentials to access the Text Classification App")
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col2:
+        st.text_input("Username", key="username", placeholder="Enter username")
+        st.text_input("Password", type="password", key="password", placeholder="Enter password")
+        if st.button("Login", use_container_width=True):
+            password_entered()
+        # Show demo credentials
+        with st.expander("Demo Credentials"):
+            st.info("""
+            **Demo Account:**
+            - Username: `demo`
+            - Password: `demo123`
+            **Admin Account:**
+            - Username: `admin`
+            - Password: `admin123`
+            """)
+    if st.session_state.get("password_correct", False) == False:
+        st.error("😞 Username or password incorrect")
+    return False
+# Utility functions
 def save_artifacts(obj, folder_name, file_name):
     """Save artifacts like encoders and vectorizers"""
     try:
         with open(os.path.join(folder_name, file_name), 'rb') as f:
             return pickle.load(f)
     except FileNotFoundError:
+        st.warning(f"File {file_name} not found in {folder_name} folder")
         return None
     except Exception as e:
         st.error(f"Error loading {file_name}: {str(e)}")
         st.error(f"Error loading model: {str(e)}")
         return None
+def safe_file_upload(uploaded_file, encoding='utf-8'):
+    """Safely read uploaded file with multiple encoding attempts"""
+    if uploaded_file is None:
+        return None
+    encodings_to_try = [encoding, 'latin1', 'cp1252', 'iso-8859-1']
+    for enc in encodings_to_try:
+        try:
+            # Reset file pointer
+            uploaded_file.seek(0)
+            df = pd.read_csv(uploaded_file, encoding=enc)
+            st.success(f"File loaded successfully with {enc} encoding")
+            return df
+        except UnicodeDecodeError:
+            continue
+        except Exception as e:
+            st.error(f"Error reading file with {enc}: {str(e)}")
+            continue
+    st.error("Could not read file with any common encoding. Please check your file format.")
+    return None
 def predict_text(model_name, text, vectorizer_type="tfidf"):
     """Make prediction on new text"""
     try:
         st.error(f"Error during prediction: {str(e)}")
         return None, None
+# Main App Logic
+def main_app():
+    # Header with user info
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.title('🤖 No Code Text Classification App')
+        st.write('Understand the behavior of your text data and train a model to classify the text data')
+    with col2:
+        st.markdown(f"**👤 User:** {st.session_state.get('authenticated_user', 'Unknown')}")
+        if st.button("Logout", type="secondary"):
+            for key in list(st.session_state.keys()):
+                del st.session_state[key]
+            st.rerun()
+    # Sidebar
+    section = st.sidebar.radio("Choose Section", ["📊 Data Analysis", "🚀 Train Model", "🔮 Predictions"])
+    # Upload Data with improved error handling
+    st.sidebar.subheader("📁 Upload Your Dataset")
+    # File encoding selection
+    encoding_choice = st.sidebar.selectbox(
+        "File Encoding",
+        ["utf-8", "latin1", "cp1252", "iso-8859-1"],
+        help="If file upload fails, try different encodings"
     )
+    train_data = st.sidebar.file_uploader(
+        "Upload training data",
+        type=["csv"],
+        help="Upload a CSV file with your training data"
+    )
+    test_data = st.sidebar.file_uploader(
+        "Upload test data (optional)",
+        type=["csv"],
+        help="Optional: Upload separate test data"
+    )
+    # Global variables to store data and settings
+    if 'vectorizer_type' not in st.session_state:
+        st.session_state.vectorizer_type = "tfidf"
+    train_df = None
+    test_df = None
+    info = None
+    if train_data is not None:
+        with st.spinner("Loading training data..."):
+            train_df = safe_file_upload(train_data, encoding_choice)
+        if train_df is not None:
+            try:
+                if test_data is not None:
+                    test_df = safe_file_upload(test_data, encoding_choice)
+                st.sidebar.success(f"✅ Training data loaded: {train_df.shape[0]} rows, {train_df.shape[1]} columns")
+                st.write("📋 Training Data Preview:")
+                st.dataframe(train_df.head(3), use_container_width=True)
+                columns = train_df.columns.tolist()
+                text_data = st.sidebar.selectbox("📝 Choose the text column:", columns)
+                target = st.sidebar.selectbox("🎯 Choose the target column:", columns)
+                # Process data
+                if text_data and target and text_data != target:
+                    with st.spinner("Processing data..."):
+                        info = Informations(train_df, text_data, target)
+                        train_df['clean_text'] = info.clean_text()
+                        train_df['text_length'] = info.text_length()
+                        # Handle label encoding manually if the class doesn't store encoder
+                        from sklearn.preprocessing import LabelEncoder
+                        label_encoder = LabelEncoder()
+                        train_df['target'] = label_encoder.fit_transform(train_df[target])
+                        # Save label encoder for later use
+                        if save_artifacts(label_encoder, "artifacts", "encoder.pkl"):
+                            st.sidebar.success("✅ Data processed successfully")
                 else:
+                    st.sidebar.warning("Please select different columns for text and target")
+            except Exception as e:
+                st.error(f"❌ Error processing data: {str(e)}")
+                train_df = None
+                info = None
+    # Data Analysis Section
+    if section == "📊 Data Analysis":
+        st.header("📊 Data Analysis & Insights")
+        if train_data is not None and train_df is not None and info is not None:
+            try:
+                # Create tabs for better organization
+                tab1, tab2, tab3 = st.tabs(["📈 Basic Stats", "📝 Text Analysis", "📊 Visualizations"])
+                with tab1:
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric("📊 Data Shape", f"{info.shape()[0]} x {info.shape()[1]}")
+                    with col2:
+                        imbalance_info = info.class_imbalanced()
+                        st.metric("⚖️ Class Balance", "Balanced" if not imbalance_info else "Imbalanced")
+                    with col3:
+                        missing_info = info.missing_values()
+                        total_missing = sum(missing_info.values()) if isinstance(missing_info, dict) else 0
+                        st.metric("❌ Missing Values", str(total_missing))
+                    st.subheader("📋 Processed Data Preview")
+                    st.dataframe(train_df[['clean_text', 'text_length', 'target']].head(), use_container_width=True)
+                with tab2:
+                    st.subheader("📏 Text Length Analysis")
+                    text_analysis = info.analysis_text_length('text_length')
+                    # Display stats in a nice format
+                    stats_col1, stats_col2 = st.columns(2)
+                    with stats_col1:
+                        st.json(text_analysis)
+                    with stats_col2:
+                        correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
+                        st.metric("🔗 Text Length-Target Correlation", f"{correlation:.4f}")
+                with tab3:
+                    st.subheader("📊 Data Visualizations")
+                    vis = Visualizations(train_df, text_data, target)
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.write("**Class Distribution**")
+                        vis.class_distribution()
+                    with col2:
+                        st.write("**Text Length Distribution**")
+                        vis.text_length_distribution()
+            except Exception as e:
+                st.error(f"❌ Error in data analysis: {str(e)}")
+        else:
+            st.info("👆 Please upload training data in the sidebar to get insights")
+    # Train Model Section
+    elif section == "🚀 Train Model":
+        st.header("🚀 Train Classification Model")
+        if train_data is not None and train_df is not None:
+            try:
+                # Create two columns for model selection
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader("🤖 Choose Model")
+                    model = st.radio("Select Algorithm:", [
+                        "Logistic Regression", "Decision Tree",
+                        "Random Forest", "Linear SVC", "SVC",
+                        "Multinomial Naive Bayes", "Gaussian Naive Bayes"
+                    ])
+                with col2:
+                    st.subheader("🔤 Choose Vectorizer")
+                    vectorizer_choice = st.radio("Select Vectorizer:", ["Tfidf Vectorizer", "Count Vectorizer"])
+                # Initialize vectorizer
+                if vectorizer_choice == "Tfidf Vectorizer":
+                    vectorizer = TfidfVectorizer(max_features=10000)
+                    st.session_state.vectorizer_type = "tfidf"
+                else:
+                    vectorizer = CountVectorizer(max_features=10000)
+                    st.session_state.vectorizer_type = "count"
+                st.subheader("📋 Training Data Preview")
+                st.dataframe(train_df[['clean_text', 'target']].head(3), use_container_width=True)
+                # Vectorize text data
+                with st.spinner("Preparing data..."):
+                    X = vectorizer.fit_transform(train_df['clean_text'])
+                    y = train_df['target']
+                    # Split data
+                    X_train, X_test, y_train, y_test = process.split_data(X, y)
+                    st.success(f"✅ Data prepared - Train: {X_train.shape}, Test: {X_test.shape}")
+                    # Save vectorizer for later use
+                    vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
+                    save_artifacts(vectorizer, "artifacts", vectorizer_filename)
+                if st.button("🚀 Start Training", type="primary", use_container_width=True):
+                    progress_bar = st.progress(0)
+                    status_text = st.empty()
+                    with st.spinner(f"Training {model} model..."):
+                        status_text.text("Initializing model...")
                         progress_bar.progress(20)
+                        models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
+                        status_text.text("Training in progress...")
                         progress_bar.progress(50)
+                        # Train selected model
                         if model == "Logistic Regression":
                             models.LogisticRegression()
                         elif model == "Decision Tree":
                         progress_bar.progress(100)
                         status_text.text("Training completed!")
+                    st.success("🎉 Model training completed successfully!")
+                    st.balloons()
+                    st.info("💡 You can now use the 'Predictions' section to classify new text.")
+            except Exception as e:
+                st.error(f"❌ Error in model training: {str(e)}")
+                st.exception(e)
+        else:
+            st.info("👆 Please upload training data in the sidebar to train a model")
+    # Predictions Section
+    elif section == "🔮 Predictions":
+        st.header("🔮 Text Classification Predictions")
+        # Check if models exist
+        if os.path.exists("models") and os.listdir("models"):
+            tab1, tab2 = st.tabs(["🎯 Single Prediction", "📊 Batch Predictions"])
+            with tab1:
+                st.subheader("🎯 Classify Single Text")
+                # Text input for prediction
+                text_input = st.text_area("Enter the text to classify:", height=100, placeholder="Type or paste your text here...")
+                # Model selection
+                available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+                if available_models:
+                    selected_model = st.selectbox("🤖 Choose the trained model:", available_models)
+                    # Prediction button
+                    if st.button("🔮 Predict", key="single_predict", type="primary"):
+                        if text_input.strip():
+                            with st.spinner("Making prediction..."):
+                                predicted_label, prediction_proba = predict_text(
+                                    selected_model,
+                                    text_input,
+                                    st.session_state.get('vectorizer_type', 'tfidf')
+                                )
+                                if predicted_label is not None:
+                                    st.success("🎉 Prediction completed!")
+                                    # Display results
+                                    st.markdown("### 📋 Prediction Results")
+                                    # Create result container
+                                    result_container = st.container()
+                                    with result_container:
+                                        st.markdown(f"**📝 Input Text:** {text_input}")
+                                        st.markdown(f"**🏷️ Predicted Class:** `{predicted_label}`")
+                                        # Display probabilities if available
+                                        if prediction_proba is not None:
+                                            st.markdown("**📊 Class Probabilities:**")
+                                            # Load encoder to get class names
+                                            encoder = load_artifacts("artifacts", "encoder.pkl")
+                                            if encoder is not None:
+                                                classes = encoder.classes_
+                                                prob_df = pd.DataFrame({
+                                                    'Class': classes,
+                                                    'Probability': prediction_proba
+                                                }).sort_values('Probability', ascending=False)
+                                                st.bar_chart(prob_df.set_index('Class'))
+                                                st.dataframe(prob_df, use_container_width=True)
+                        else:
+                            st.warning("⚠️ Please enter some text to classify")
+                else:
+                    st.warning("⚠️ No trained models found. Please train a model first.")
+            with tab2:
+                st.subheader("📊 Batch Classification")
+                uploaded_file = st.file_uploader(
+                    "Upload a CSV file with text to classify",
+                    type=['csv'],
+                    help="Upload a CSV file containing text data for batch classification"
+                )
+                if uploaded_file is not None:
+                    try:
+                        batch_df = safe_file_upload(uploaded_file)
+                        if batch_df is not None:
+                            st.write("📋 Uploaded data preview:")
+                            st.dataframe(batch_df.head(), use_container_width=True)
+                            # Select text column
+                            text_column = st.selectbox("📝 Select the text column:", batch_df.columns.tolist())
+                            available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+                            batch_model = st.selectbox("🤖 Choose model for batch prediction:", available_models, key="batch_model")
+                            if st.button("🚀 Run Batch Predictions", key="batch_predict", type="primary"):
+                                progress_bar = st.progress(0)
+                                status_text = st.empty()
+                                with st.spinner("Processing batch predictions..."):
+                                    predictions = []
+                                    total_texts = len(batch_df)
+                                    for i, text in enumerate(batch_df[text_column]):
+                                        status_text.text(f"Processing {i+1}/{total_texts} texts...")
+                                        progress_bar.progress((i+1)/total_texts)
+                                        pred, _ = predict_text(
+                                            batch_model,
+                                            str(text),
+                                            st.session_state.get('vectorizer_type', 'tfidf')
+                                        )
+                                        predictions.append(pred if pred is not None else "Error")
+                                    batch_df['Predicted_Class'] = predictions
+                                    st.success("🎉 Batch predictions completed!")
+                                    st.write("📊 Results:")
+                                    st.dataframe(batch_df[[text_column, 'Predicted_Class']], use_container_width=True)
+                                    # Download results
+                                    csv = batch_df.to_csv(index=False)
+                                    st.download_button(
+                                        label="📥 Download predictions as CSV",
+                                        data=csv,
+                                        file_name="batch_predictions.csv",
+                                        mime="text/csv",
+                                        type="primary"
+                                    )
+                    except Exception as e:
+                        st.error(f"❌ Error in batch prediction: {str(e)}")
         else:
+            st.info("⚠️ No trained models found. Please go to 'Train Model' section to train a model first.")
+# Main execution
+def main():
+    # Page config
+    st.set_page_config(
+        page_title="Text Classification App",
+        page_icon="🤖",
+        layout="wide",
+        initial_sidebar_state="expanded"
+    )
+    # Custom CSS for better styling
+    st.markdown("""
+    <style>
+    .main {
+        padding-top: 1rem;
+    }
+    .stAlert {
+        margin-top: 1rem;
+    }
+    .metric-container {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 0.5rem 0;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    # Check authentication
+    if check_password():
+        main_app()
+if __name__ == "__main__":
+    main()