Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 6, 2025

Commit

6b934fc

verified ·

1 Parent(s): 5ba4816

Update app.py

Browse files

Files changed (1) hide show

app.py +306 -412

app.py CHANGED Viewed

@@ -2,106 +2,110 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.svm import LinearSVC, SVC
-from sklearn.naive_bayes import MultinomialNB, GaussianNB
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import os
 import pickle
-import tempfile
-import re
-import string
-from collections import Counter
-# Text Cleaning Class (replacing the custom module)
-class TextCleaner:
-    def clean_text(self, text):
-        """Clean and preprocess text"""
-        if pd.isna(text):
-            return ""
-        # Convert to lowercase
-        text = str(text).lower()
-        # Remove special characters and digits
-        text = re.sub(r'[^a-zA-Z\s]', '', text)
-        # Remove extra whitespace
-        text = ' '.join(text.split())
-        return text
-# Information Analysis Class (replacing the custom module)
-class TextInformations:
-    def __init__(self, df, text_col, target_col):
-        self.df = df
-        self.text_col = text_col
-        self.target_col = target_col
-    def shape(self):
-        return self.df.shape
-    def missing_values(self):
-        return self.df.isnull().sum().to_dict()
-    def class_imbalanced(self):
-        return self.df[self.target_col].value_counts().to_dict()
-    def clean_text(self):
-        cleaner = TextCleaner()
-        return self.df[self.text_col].apply(cleaner.clean_text)
-    def text_length(self):
-        return self.df[self.text_col].str.len()
 # Utility functions
-def save_to_session(obj, key):
-    """Save objects to session state instead of files"""
-    st.session_state[key] = obj
-def load_from_session(key):
-    """Load objects from session state"""
-    return st.session_state.get(key, None)
-def train_model(model_name, X_train, X_test, y_train, y_test):
-    """Train the selected model"""
-    if model_name == "Logistic Regression":
-        model = LogisticRegression(random_state=42, max_iter=1000)
-    elif model_name == "Decision Tree":
-        model = DecisionTreeClassifier(random_state=42)
-    elif model_name == "Random Forest":
-        model = RandomForestClassifier(random_state=42, n_estimators=100)
-    elif model_name == "Linear SVC":
-        model = LinearSVC(random_state=42, max_iter=1000)
-    elif model_name == "SVC":
-        model = SVC(random_state=42, probability=True)
-    elif model_name == "Multinomial Naive Bayes":
-        model = MultinomialNB()
-    elif model_name == "Gaussian Naive Bayes":
-        model = GaussianNB()
-    # Train model
-    model.fit(X_train, y_train)
-    # Make predictions
-    y_pred = model.predict(X_test)
-    accuracy = accuracy_score(y_test, y_pred)
-    return model, accuracy
-def predict_text(text, model, vectorizer, encoder):
     """Make prediction on new text"""
     try:
-        # Clean text
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
-        # Transform text using the vectorizer
         text_vector = vectorizer.transform([clean_text])
         # Make prediction
@@ -124,425 +128,315 @@ def predict_text(text, model, vectorizer, encoder):
         st.error(f"Error during prediction: {str(e)}")
         return None, None
-# Streamlit App Configuration
-st.set_page_config(
-    page_title="Text Classification App",
-    page_icon="📝",
-    layout="wide"
-)
 st.title('📝 No Code Text Classification App')
-st.markdown('Analyze your text data and train machine learning models for text classification')
-# Initialize session state
-if 'model_trained' not in st.session_state:
-    st.session_state.model_trained = False
-if 'training_data_processed' not in st.session_state:
-    st.session_state.training_data_processed = False
 # Sidebar
 st.sidebar.title("Navigation")
-section = st.sidebar.radio(
-    "Choose Section",
-    ["📊 Data Analysis", "🤖 Train Model", "🔮 Predictions"],
-    index=0
-)
-# Upload Data Section
-st.sidebar.markdown("---")
 st.sidebar.subheader("📁 Upload Your Dataset")
-# File uploader with better error handling
-try:
-    train_data = st.sidebar.file_uploader(
-        "Upload training data (CSV)",
-        type=["csv"],
-        help="Upload a CSV file with text and labels for training"
-    )
-    test_data = st.sidebar.file_uploader(
-        "Upload test data (CSV, optional)",
-        type=["csv"],
-        help="Optional: Upload a separate test dataset"
-    )
-except Exception as e:
-    st.sidebar.error(f"File upload error: {str(e)}")
-    st.sidebar.info("Try refreshing the page or using a different browser")
 # Process uploaded data
 if train_data is not None:
     try:
-        # Add encoding options to handle different CSV formats
-        encoding_option = st.sidebar.selectbox(
-            "CSV Encoding",
-            ["utf-8", "latin-1", "cp1252", "iso-8859-1"],
-            help="Try different encodings if you get errors"
-        )
-        train_df = pd.read_csv(train_data, encoding=encoding_option)
-        if test_data is not None:
-            test_df = pd.read_csv(test_data, encoding=encoding_option)
-        else:
-            test_df = None
-        st.sidebar.success(f"✅ Training data loaded: {train_df.shape[0]} rows, {train_df.shape[1]} columns")
-        # Column selection
-        columns = train_df.columns.tolist()
-        text_data = st.sidebar.selectbox("📝 Choose the text column:", columns)
-        target = st.sidebar.selectbox("🎯 Choose the target column:", columns)
-        # Store processed data in session state
-        st.session_state.train_df = train_df
-        st.session_state.test_df = test_df
-        st.session_state.text_col = text_data
-        st.session_state.target_col = target
-        st.session_state.training_data_processed = True
     except Exception as e:
-        st.sidebar.error(f"❌ Error loading data: {str(e)}")
-        st.sidebar.info("Please check your CSV file format and encoding")
 # Data Analysis Section
-if section == "📊 Data Analysis":
-    st.header("📊 Data Analysis")
-    if st.session_state.get('training_data_processed', False):
         try:
-            train_df = st.session_state.train_df
-            text_col = st.session_state.text_col
-            target_col = st.session_state.target_col
-            # Create info object
-            info = TextInformations(train_df, text_col, target_col)
-            # Data preprocessing
-            train_df['clean_text'] = info.clean_text()
-            train_df['text_length'] = info.text_length()
-            # Display basic information
             col1, col2, col3 = st.columns(3)
             with col1:
-                st.metric("Dataset Shape", f"{info.shape()[0]} × {info.shape()[1]}")
             with col2:
-                missing_vals = sum(info.missing_values().values())
-                st.metric("Missing Values", missing_vals)
             with col3:
-                unique_classes = len(info.class_imbalanced())
-                st.metric("Unique Classes", unique_classes)
-            # Data preview
-            st.subheader("📋 Data Preview")
-            st.dataframe(train_df[[text_col, target_col, 'clean_text', 'text_length']].head(10))
-            # Class distribution
-            st.subheader("📊 Class Distribution")
-            class_counts = info.class_imbalanced()
-            col1, col2 = st.columns(2)
-            with col1:
-                fig, ax = plt.subplots(figsize=(8, 6))
-                classes = list(class_counts.keys())
-                counts = list(class_counts.values())
-                ax.bar(classes, counts, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8'])
-                ax.set_title('Class Distribution')
-                ax.set_xlabel('Classes')
-                ax.set_ylabel('Count')
-                plt.xticks(rotation=45)
-                st.pyplot(fig)
-            with col2:
-                st.write("**Class Distribution:**")
-                for class_name, count in class_counts.items():
-                    percentage = (count / len(train_df)) * 100
-                    st.write(f"- {class_name}: {count} ({percentage:.1f}%)")
-            # Text length analysis
-            st.subheader("📏 Text Length Analysis")
-            col1, col2 = st.columns(2)
-            with col1:
-                fig, ax = plt.subplots(figsize=(8, 6))
-                ax.hist(train_df['text_length'], bins=50, alpha=0.7, color='#4ECDC4')
-                ax.set_title('Text Length Distribution')
-                ax.set_xlabel('Text Length (characters)')
-                ax.set_ylabel('Frequency')
-                st.pyplot(fig)
-            with col2:
-                st.write("**Text Length Statistics:**")
-                length_stats = train_df['text_length'].describe()
-                for stat, value in length_stats.items():
-                    st.write(f"- {stat.title()}: {value:.1f}")
-            # Update session state
-            st.session_state.processed_train_df = train_df
         except Exception as e:
-            st.error(f"❌ Error in data analysis: {str(e)}")
     else:
-        st.info("🔄 Please upload training data to perform analysis")
 # Train Model Section
-elif section == "🤖 Train Model":
-    st.header("🤖 Train Model")
-    if st.session_state.get('training_data_processed', False):
         try:
-            if 'processed_train_df' in st.session_state:
-                train_df = st.session_state.processed_train_df
-            else:
-                # Process data if not already processed
-                train_df = st.session_state.train_df
-                text_col = st.session_state.text_col
-                target_col = st.session_state.target_col
-                info = TextInformations(train_df, text_col, target_col)
-                train_df['clean_text'] = info.clean_text()
-                train_df['text_length'] = info.text_length()
-            # Model and vectorizer selection
             col1, col2 = st.columns(2)
             with col1:
-                st.subheader("🎯 Model Selection")
-                model_name = st.selectbox("Choose the Model", [
                     "Logistic Regression", "Decision Tree",
                     "Random Forest", "Linear SVC", "SVC",
                     "Multinomial Naive Bayes", "Gaussian Naive Bayes"
                 ])
             with col2:
-                st.subheader("📊 Vectorizer Selection")
-                vectorizer_choice = st.selectbox("Choose Vectorizer", ["TF-IDF", "Count"])
-            # Training parameters
-            st.subheader("⚙️ Training Parameters")
-            col1, col2 = st.columns(2)
-            with col1:
-                max_features = st.slider("Max Features", 1000, 20000, 10000, 1000)
-                test_size = st.slider("Test Size", 0.1, 0.5, 0.2, 0.05)
-            with col2:
-                random_state = st.number_input("Random State", 0, 100, 42)
-            # Training button
             if st.button("🚀 Start Training", type="primary"):
-                with st.spinner("Training model... Please wait"):
                     try:
-                        # Prepare data
-                        X_text = train_df['clean_text'].fillna('')
-                        y = train_df[st.session_state.target_col]
-                        # Label encoding
-                        label_encoder = LabelEncoder()
-                        y_encoded = label_encoder.fit_transform(y)
-                        # Vectorization
-                        if vectorizer_choice == "TF-IDF":
-                            vectorizer = TfidfVectorizer(max_features=max_features, stop_words='english')
-                        else:
-                            vectorizer = CountVectorizer(max_features=max_features, stop_words='english')
-                        X_vectorized = vectorizer.fit_transform(X_text)
-                        # Train-test split
-                        X_train, X_test, y_train, y_test = train_test_split(
-                            X_vectorized, y_encoded,
-                            test_size=test_size,
-                            random_state=random_state,
-                            stratify=y_encoded
-                        )
-                        # Train model
-                        model, accuracy = train_model(model_name, X_train, X_test, y_train, y_test)
-                        # Save to session state
-                        save_to_session(model, 'trained_model')
-                        save_to_session(vectorizer, 'vectorizer')
-                        save_to_session(label_encoder, 'label_encoder')
-                        save_to_session(model_name, 'model_name')
-                        save_to_session(vectorizer_choice, 'vectorizer_type')
-                        st.session_state.model_trained = True
-                        # Display results
-                        st.success(f"✅ Model training completed!")
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            st.metric("Model Accuracy", f"{accuracy:.4f}")
-                        with col2:
-                            st.metric("Training Samples", len(X_train))
-                        st.info("🎉 You can now use the 'Predictions' section to classify new text!")
                     except Exception as e:
-                        st.error(f"❌ Error during training: {str(e)}")
         except Exception as e:
-            st.error(f"❌ Error in model training setup: {str(e)}")
     else:
-        st.info("🔄 Please upload and analyze training data first")
 # Predictions Section
-elif section == "🔮 Predictions":
-    st.header("🔮 Make Predictions")
-    if st.session_state.get('model_trained', False):
-        # Single text prediction
-        st.subheader("📝 Single Text Prediction")
-        text_input = st.text_area(
-            "Enter text to classify:",
-            height=120,
-            placeholder="Type or paste your text here..."
-        )
-        col1, col2 = st.columns([1, 3])
-        with col1:
-            if st.button("🔮 Predict", type="primary"):
                 if text_input.strip():
-                    try:
-                        model = load_from_session('trained_model')
-                        vectorizer = load_from_session('vectorizer')
-                        encoder = load_from_session('label_encoder')
                         predicted_label, prediction_proba = predict_text(
-                            text_input, model, vectorizer, encoder
                         )
                         if predicted_label is not None:
                             st.success("✅ Prediction completed!")
                             # Display results
-                            st.markdown("### 📊 Results")
-                            st.markdown(f"**Predicted Class:** `{predicted_label}`")
                             # Display probabilities if available
                             if prediction_proba is not None:
                                 st.markdown("**Class Probabilities:**")
-                                classes = encoder.classes_
-                                prob_data = pd.DataFrame({
-                                    'Class': classes,
-                                    'Probability': prediction_proba
-                                }).sort_values('Probability', ascending=False)
-                                # Show as bar chart
-                                st.bar_chart(prob_data.set_index('Class'))
-                                # Show as table
-                                st.dataframe(prob_data, use_container_width=True)
-                    except Exception as e:
-                        st.error(f"❌ Prediction error: {str(e)}")
                 else:
                     st.warning("⚠️ Please enter some text to classify")
-        # Batch predictions
-        st.markdown("---")
-        st.subheader("📁 Batch Predictions")
-        uploaded_batch = st.file_uploader(
-            "Upload CSV file for batch predictions",
-            type=['csv'],
-            help="Upload a CSV file with text data to classify multiple texts at once"
-        )
-        if uploaded_batch is not None:
-            try:
-                # Load batch data
-                encoding_option = st.selectbox(
-                    "Batch CSV Encoding",
-                    ["utf-8", "latin-1", "cp1252", "iso-8859-1"],
-                    key="batch_encoding"
-                )
-                batch_df = pd.read_csv(uploaded_batch, encoding=encoding_option)
-                st.write("📋 **Batch Data Preview:**")
-                st.dataframe(batch_df.head())
                 # Select text column
-                text_column = st.selectbox(
-                    "Select the text column:",
-                    batch_df.columns.tolist()
-                )
-                if st.button("🚀 Run Batch Predictions", type="primary"):
-                    with st.spinner("Processing batch predictions..."):
-                        try:
-                            model = load_from_session('trained_model')
-                            vectorizer = load_from_session('vectorizer')
-                            encoder = load_from_session('label_encoder')
                             predictions = []
-                            confidences = []
                             progress_bar = st.progress(0)
-                            total_rows = len(batch_df)
                             for idx, text in enumerate(batch_df[text_column]):
-                                pred, pred_proba = predict_text(
-                                    str(text), model, vectorizer, encoder
                                 )
                                 predictions.append(pred if pred is not None else "Error")
-                                # Get confidence (max probability)
-                                if pred_proba is not None:
-                                    confidences.append(max(pred_proba))
-                                else:
-                                    confidences.append(0.0)
-                                progress_bar.progress((idx + 1) / total_rows)
                             batch_df['Predicted_Class'] = predictions
-                            batch_df['Confidence'] = confidences
                             st.success("✅ Batch predictions completed!")
-                            # Show results
-                            st.write("📊 **Prediction Results:**")
-                            st.dataframe(batch_df[[text_column, 'Predicted_Class', 'Confidence']])
                             # Download results
                             csv = batch_df.to_csv(index=False)
                             st.download_button(
-                                label="📥 Download Results as CSV",
                                 data=csv,
                                 file_name="batch_predictions.csv",
                                 mime="text/csv"
                             )
-                        except Exception as e:
-                            st.error(f"❌ Batch prediction error: {str(e)}")
-            except Exception as e:
-                st.error(f"❌ Error loading batch file: {str(e)}")
-    else:
-        st.info("🔄 Please train a model first before making predictions")
-        # Show model info if available
-        if st.session_state.get('training_data_processed', False):
-            st.write("💡 **Tip:** Go to the 'Train Model' section to train a model first!")
-# Footer
-st.markdown("---")
-st.markdown(
-    """
-    <div style='text-align: center; color: #666; padding: 20px;'>
-        <p>📝 No Code Text Classification App</p>
-        <p>Built with Streamlit • Upload CSV → Analyze → Train → Predict</p>
-    </div>
-    """,
-    unsafe_allow_html=True
-)

 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
+from NoCodeTextClassifier.EDA import Informations, Visualizations
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization
+from NoCodeTextClassifier.models import Models
 import os
 import pickle
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import io
+# Set page config
+st.set_page_config(page_title="Text Classification App", page_icon="📝", layout="wide")
 # Utility functions
+def save_artifacts(obj, folder_name, file_name):
+    """Save artifacts like encoders and vectorizers"""
+    try:
+        os.makedirs(folder_name, exist_ok=True)
+        with open(os.path.join(folder_name, file_name), 'wb') as f:
+            pickle.dump(obj, f)
+        return True
+    except Exception as e:
+        st.error(f"Error saving {file_name}: {str(e)}")
+        return False
+def load_artifacts(folder_name, file_name):
+    """Load saved artifacts"""
+    try:
+        with open(os.path.join(folder_name, file_name), 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        st.error(f"File {file_name} not found in {folder_name} folder")
+        return None
+    except Exception as e:
+        st.error(f"Error loading {file_name}: {str(e)}")
+        return None
+def load_model(model_name):
+    """Load trained model"""
+    try:
+        with open(os.path.join('models', model_name), 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        st.error(f"Model {model_name} not found. Please train a model first.")
+        return None
+    except Exception as e:
+        st.error(f"Error loading model {model_name}: {str(e)}")
+        return None
+def safe_read_csv(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
+    """Safely read CSV with multiple encoding options"""
+    for encoding in encoding_options:
+        try:
+            # Reset file pointer
+            uploaded_file.seek(0)
+            # Read as bytes first, then decode
+            content = uploaded_file.read()
+            if isinstance(content, bytes):
+                content = content.decode(encoding)
+            # Use StringIO to create a file-like object
+            df = pd.read_csv(io.StringIO(content))
+            st.success(f"File loaded successfully with {encoding} encoding")
+            return df
+        except UnicodeDecodeError:
+            continue
+        except Exception as e:
+            st.warning(f"Failed to read with {encoding} encoding: {str(e)}")
+            continue
+    # If all encodings fail, try pandas default
+    try:
+        uploaded_file.seek(0)
+        df = pd.read_csv(uploaded_file)
+        st.success("File loaded with default encoding")
+        return df
+    except Exception as e:
+        st.error(f"All encoding attempts failed. Error: {str(e)}")
+        return None
+def predict_text(model_name, text, vectorizer_type="tfidf"):
     """Make prediction on new text"""
     try:
+        # Load model
+        model = load_model(model_name)
+        if model is None:
+            return None, None
+        # Load vectorizer
+        vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
+        vectorizer = load_artifacts("artifacts", vectorizer_file)
+        if vectorizer is None:
+            return None, None
+        # Load label encoder
+        encoder = load_artifacts("artifacts", "encoder.pkl")
+        if encoder is None:
+            return None, None
+        # Clean and vectorize text
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
+        # Transform text using the same vectorizer used during training
         text_vector = vectorizer.transform([clean_text])
         # Make prediction
         st.error(f"Error during prediction: {str(e)}")
         return None, None
+# Streamlit App
 st.title('📝 No Code Text Classification App')
+st.write('Understand the behavior of your text data and train a model to classify the text data')
 # Sidebar
 st.sidebar.title("Navigation")
+section = st.sidebar.radio("Choose Section", ["Data Analysis", "Train Model", "Predictions"])
+# Upload Data
 st.sidebar.subheader("📁 Upload Your Dataset")
+train_data = st.sidebar.file_uploader("Upload training data", type=["csv"], key="train_upload")
+test_data = st.sidebar.file_uploader("Upload test data (optional)", type=["csv"], key="test_upload")
+# Global variables to store data and settings
+if 'vectorizer_type' not in st.session_state:
+    st.session_state.vectorizer_type = "tfidf"
+if 'train_df' not in st.session_state:
+    st.session_state.train_df = None
+if 'info' not in st.session_state:
+    st.session_state.info = None
 # Process uploaded data
 if train_data is not None:
     try:
+        # Use safe CSV reading function
+        train_df = safe_read_csv(train_data)
+        if train_df is not None:
+            st.session_state.train_df = train_df
+            if test_data is not None:
+                test_df = safe_read_csv(test_data)
+                st.session_state.test_df = test_df
+            else:
+                st.session_state.test_df = None
+            st.sidebar.success("✅ Data loaded successfully!")
+            st.write("Training Data Preview:")
+            st.write(train_df.head(3))
+            columns = train_df.columns.tolist()
+            text_data = st.sidebar.selectbox("Choose the text column:", columns, key="text_col")
+            target = st.sidebar.selectbox("Choose the target column:", columns, key="target_col")
+            if text_data and target:
+                try:
+                    # Process data
+                    info = Informations(train_df, text_data, target)
+                    train_df['clean_text'] = info.clean_text()
+                    train_df['text_length'] = info.text_length()
+                    # Handle label encoding manually
+                    from sklearn.preprocessing import LabelEncoder
+                    label_encoder = LabelEncoder()
+                    train_df['target'] = label_encoder.fit_transform(train_df[target])
+                    # Save label encoder for later use
+                    if save_artifacts(label_encoder, "artifacts", "encoder.pkl"):
+                        st.sidebar.success("✅ Data processed successfully!")
+                    st.session_state.train_df = train_df
+                    st.session_state.info = info
+                except Exception as e:
+                    st.error(f"Error processing data: {str(e)}")
+                    st.session_state.train_df = None
+                    st.session_state.info = None
     except Exception as e:
+        st.error(f"Error loading data: {str(e)}")
+        st.session_state.train_df = None
+        st.session_state.info = None
+# Get data from session state
+train_df = st.session_state.get('train_df')
+info = st.session_state.get('info')
 # Data Analysis Section
+if section == "Data Analysis":
+    if train_data is not None and train_df is not None:
         try:
+            st.subheader("📊 Get Insights from the Data")
             col1, col2, col3 = st.columns(3)
             with col1:
+                st.metric("Data Shape", f"{info.shape()[0]} rows × {info.shape()[1]} cols")
             with col2:
+                st.metric("Classes", len(train_df['target'].unique()))
             with col3:
+                st.metric("Missing Values", info.missing_values())
+            st.write("**Class Distribution:**", info.class_imbalanced())
+            st.write("**Processed Data Preview:**")
+            st.write(train_df[['clean_text', 'text_length', 'target']].head(3))
+            st.markdown("**Text Length Analysis**")
+            st.write(info.analysis_text_length('text_length'))
+            # Calculate correlation manually
+            correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
+            st.write(f"**Correlation between Text Length and Target:** {correlation:.4f}")
+            st.subheader("📈 Visualizations")
+            try:
+                columns = train_df.columns.tolist()
+                text_col = next((col for col in columns if 'text' in col.lower() or col in ['message', 'content', 'review']), columns[0])
+                target_col = next((col for col in columns if col in ['label', 'target', 'class', 'category']), columns[-1])
+                vis = Visualizations(train_df, text_col, target_col)
+                vis.class_distribution()
+                vis.text_length_distribution()
+            except Exception as e:
+                st.error(f"Error generating visualizations: {str(e)}")
         except Exception as e:
+            st.error(f"Error in data analysis: {str(e)}")
     else:
+        st.warning("⚠️ Please upload training data to get insights")
 # Train Model Section
+elif section == "Train Model":
+    if train_data is not None and train_df is not None:
         try:
+            st.subheader("🤖 Train a Model")
+            # Create two columns for model selection
             col1, col2 = st.columns(2)
             with col1:
+                st.markdown("**Select Model:**")
+                model = st.radio("Choose the Model", [
                     "Logistic Regression", "Decision Tree",
                     "Random Forest", "Linear SVC", "SVC",
                     "Multinomial Naive Bayes", "Gaussian Naive Bayes"
                 ])
             with col2:
+                st.markdown("**Select Vectorizer:**")
+                vectorizer_choice = st.radio("Choose Vectorizer", ["Tfidf Vectorizer", "Count Vectorizer"])
+            # Initialize vectorizer
+            if vectorizer_choice == "Tfidf Vectorizer":
+                vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
+                st.session_state.vectorizer_type = "tfidf"
+            else:
+                vectorizer = CountVectorizer(max_features=10000, stop_words='english')
+                st.session_state.vectorizer_type = "count"
+            st.write("**Training Data Preview:**")
+            st.write(train_df[['clean_text', 'target']].head(3))
+            # Vectorize text data
+            with st.spinner("Vectorizing text data..."):
+                X = vectorizer.fit_transform(train_df['clean_text'])
+                y = train_df['target']
+            # Split data
+            X_train, X_test, y_train, y_test = process.split_data(X, y)
+            st.write(f"**Data split** - Train: {X_train.shape}, Test: {X_test.shape}")
+            # Save vectorizer for later use
+            vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
+            save_artifacts(vectorizer, "artifacts", vectorizer_filename)
             if st.button("🚀 Start Training", type="primary"):
+                with st.spinner("Training model..."):
                     try:
+                        models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
+                        # Train selected model
+                        if model == "Logistic Regression":
+                            models.LogisticRegression()
+                        elif model == "Decision Tree":
+                            models.DecisionTree()
+                        elif model == "Linear SVC":
+                            models.LinearSVC()
+                        elif model == "SVC":
+                            models.SVC()
+                        elif model == "Multinomial Naive Bayes":
+                            models.MultinomialNB()
+                        elif model == "Random Forest":
+                            models.RandomForestClassifier()
+                        elif model == "Gaussian Naive Bayes":
+                            models.GaussianNB()
+                        st.success("🎉 Model training completed!")
+                        st.info("You can now use the 'Predictions' section to classify new text.")
                     except Exception as e:
+                        st.error(f"Error during model training: {str(e)}")
         except Exception as e:
+            st.error(f"Error in model training: {str(e)}")
     else:
+        st.warning("⚠️ Please upload training data to train a model")
 # Predictions Section
+elif section == "Predictions":
+    st.subheader("🔮 Perform Predictions on New Text")
+    # Check if models exist
+    if os.path.exists("models") and os.listdir("models"):
+        # Text input for prediction
+        text_input = st.text_area("Enter the text to classify:", height=100, placeholder="Type your text here...")
+        # Model selection
+        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+        if available_models:
+            selected_model = st.selectbox("Choose the trained model:", available_models)
+            # Prediction button
+            if st.button("🎯 Predict", key="single_predict", type="primary"):
                 if text_input.strip():
+                    with st.spinner("Making prediction..."):
                         predicted_label, prediction_proba = predict_text(
+                            selected_model,
+                            text_input,
+                            st.session_state.get('vectorizer_type', 'tfidf')
                         )
                         if predicted_label is not None:
                             st.success("✅ Prediction completed!")
                             # Display results
+                            st.markdown("### 📊 Prediction Results")
+                            col1, col2 = st.columns([2, 1])
+                            with col1:
+                                st.markdown(f"**Input Text:** {text_input}")
+                            with col2:
+                                st.markdown(f"**Predicted Class:** `{predicted_label}`")
                             # Display probabilities if available
                             if prediction_proba is not None:
                                 st.markdown("**Class Probabilities:**")
+                                # Load encoder to get class names
+                                encoder = load_artifacts("artifacts", "encoder.pkl")
+                                if encoder is not None:
+                                    classes = encoder.classes_
+                                    prob_df = pd.DataFrame({
+                                        'Class': classes,
+                                        'Probability': prediction_proba
+                                    }).sort_values('Probability', ascending=False)
+                                    col1, col2 = st.columns(2)
+                                    with col1:
+                                        st.bar_chart(prob_df.set_index('Class'))
+                                    with col2:
+                                        st.dataframe(prob_df, use_container_width=True)
                 else:
                     st.warning("⚠️ Please enter some text to classify")
+        else:
+            st.warning("⚠️ No trained models found. Please train a model first.")
+    else:
+        st.warning("⚠️ No trained models found. Please go to 'Train Model' section to train a model first.")
+    # Option to classify multiple texts
+    st.markdown("---")
+    st.subheader("📊 Batch Predictions")
+    uploaded_file = st.file_uploader("Upload a CSV file with text to classify", type=['csv'], key="batch_upload")
+    if uploaded_file is not None:
+        try:
+            batch_df = safe_read_csv(uploaded_file)
+            if batch_df is not None:
+                st.write("**Uploaded data preview:**")
+                st.write(batch_df.head())
                 # Select text column
+                text_column = st.selectbox("Select the text column:", batch_df.columns.tolist())
+                if os.path.exists("models") and os.listdir("models"):
+                    available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+                    batch_model = st.selectbox("Choose model for batch prediction:", available_models, key="batch_model")
+                    if st.button("🚀 Run Batch Predictions", key="batch_predict", type="primary"):
+                        with st.spinner("Processing batch predictions..."):
                             predictions = []
                             progress_bar = st.progress(0)
                             for idx, text in enumerate(batch_df[text_column]):
+                                pred, _ = predict_text(
+                                    batch_model,
+                                    str(text),
+                                    st.session_state.get('vectorizer_type', 'tfidf')
                                 )
                                 predictions.append(pred if pred is not None else "Error")
+                                progress_bar.progress((idx + 1) / len(batch_df))
                             batch_df['Predicted_Class'] = predictions
                             st.success("✅ Batch predictions completed!")
+                            st.write("**Results:**")
+                            st.write(batch_df[[text_column, 'Predicted_Class']])
                             # Download results
                             csv = batch_df.to_csv(index=False)
                             st.download_button(
+                                label="📥 Download predictions as CSV",
                                 data=csv,
                                 file_name="batch_predictions.csv",
                                 mime="text/csv"
                             )
+        except Exception as e:
+            st.error(f"Error in batch prediction: {str(e)}")