Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 6, 2025

Commit

5ba4816

verified ·

1 Parent(s): 4d55e84

Update app.py

Browse files

Files changed (1) hide show

app.py +445 -233

app.py CHANGED Viewed

@@ -2,63 +2,106 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
-from NoCodeTextClassifier.EDA import Informations, Visualizations
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization
-from NoCodeTextClassifier.models import Models
 import os
 import pickle
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 # Utility functions
-def save_artifacts(obj, folder_name, file_name):
-    """Save artifacts like encoders and vectorizers"""
-    os.makedirs(folder_name, exist_ok=True)
-    with open(os.path.join(folder_name, file_name), 'wb') as f:
-        pickle.dump(obj, f)
-def load_artifacts(folder_name, file_name):
-    """Load saved artifacts"""
-    try:
-        with open(os.path.join(folder_name, file_name), 'rb') as f:
-            return pickle.load(f)
-    except FileNotFoundError:
-        st.error(f"File {file_name} not found in {folder_name} folder")
-        return None
-def load_model(model_name):
-    """Load trained model"""
-    try:
-        with open(os.path.join('models', model_name), 'rb') as f:
-            return pickle.load(f)
-    except FileNotFoundError:
-        st.error(f"Model {model_name} not found. Please train a model first.")
-        return None
-def predict_text(model_name, text, vectorizer_type="tfidf"):
     """Make prediction on new text"""
     try:
-        # Load model
-        model = load_model(model_name)
-        if model is None:
-            return None, None
-        # Load vectorizer
-        vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
-        vectorizer = load_artifacts("artifacts", vectorizer_file)
-        if vectorizer is None:
-            return None, None
-        # Load label encoder
-        encoder = load_artifacts("artifacts", "encoder.pkl")
-        if encoder is None:
-            return None, None
-        # Clean and vectorize text
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
-        # Transform text using the same vectorizer used during training
         text_vector = vectorizer.transform([clean_text])
         # Make prediction
@@ -81,256 +124,425 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
         st.error(f"Error during prediction: {str(e)}")
         return None, None
-# Streamlit App
-st.title('No Code Text Classification App')
-st.write('Understand the behavior of your text data and train a model to classify the text data')
 # Sidebar
-section = st.sidebar.radio("Choose Section", ["Data Analysis", "Train Model", "Predictions"])
-# Upload Data
-st.sidebar.subheader("Upload Your Dataset")
-train_data = st.sidebar.file_uploader("Upload training data", type=["csv"])
-test_data = st.sidebar.file_uploader("Upload test data (optional)", type=["csv"])
-# Global variables to store data and settings
-if 'vectorizer_type' not in st.session_state:
-    st.session_state.vectorizer_type = "tfidf"
 if train_data is not None:
     try:
-        train_df = pd.read_csv(train_data, encoding='latin1')
         if test_data is not None:
-            test_df = pd.read_csv(test_data, encoding='latin1')
         else:
             test_df = None
-        st.write("Training Data Preview:")
-        st.write(train_df.head(3))
         columns = train_df.columns.tolist()
-        text_data = st.sidebar.selectbox("Choose the text column:", columns)
-        target = st.sidebar.selectbox("Choose the target column:", columns)
-        # Process data
-        info = Informations(train_df, text_data, target)
-        train_df['clean_text'] = info.clean_text()
-        train_df['text_length'] = info.text_length()
-        # Handle label encoding manually if the class doesn't store encoder
-        from sklearn.preprocessing import LabelEncoder
-        label_encoder = LabelEncoder()
-        train_df['target'] = label_encoder.fit_transform(train_df[target])
-        # Save label encoder for later use
-        os.makedirs("artifacts", exist_ok=True)
-        save_artifacts(label_encoder, "artifacts", "encoder.pkl")
     except Exception as e:
-        st.error(f"Error loading data: {str(e)}")
-        train_df = None
-        info = None
 # Data Analysis Section
-if section == "Data Analysis":
-    if train_data is not None and train_df is not None:
         try:
-            st.subheader("Get Insights from the Data")
-            st.write("Data Shape:", info.shape())
-            st.write("Class Imbalance:", info.class_imbalanced())
-            st.write("Missing Values:", info.missing_values())
-            st.write("Processed Data Preview:")
-            st.write(train_df[['clean_text', 'text_length', 'target']].head(3))
-            st.markdown("**Text Length Analysis**")
-            st.write(info.analysis_text_length('text_length'))
-            # Calculate correlation manually since we handled encoding separately
-            correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
-            st.write(f"Correlation between Text Length and Target: {correlation:.4f}")
-            st.subheader("Visualizations")
-            vis = Visualizations(train_df, text_data, target)
-            vis.class_distribution()
-            vis.text_length_distribution()
         except Exception as e:
-            st.error(f"Error in data analysis: {str(e)}")
     else:
-        st.warning("Please upload training data to get insights")
 # Train Model Section
-elif section == "Train Model":
-    if train_data is not None and train_df is not None:
         try:
-            st.subheader("Train a Model")
-            # Create two columns for model selection
             col1, col2 = st.columns(2)
             with col1:
-                model = st.radio("Choose the Model", [
                     "Logistic Regression", "Decision Tree",
                     "Random Forest", "Linear SVC", "SVC",
                     "Multinomial Naive Bayes", "Gaussian Naive Bayes"
                 ])
             with col2:
-                vectorizer_choice = st.radio("Choose Vectorizer", ["Tfidf Vectorizer", "Count Vectorizer"])
-            # Initialize vectorizer
-            if vectorizer_choice == "Tfidf Vectorizer":
-                vectorizer = TfidfVectorizer(max_features=10000)
-                st.session_state.vectorizer_type = "tfidf"
-            else:
-                vectorizer = CountVectorizer(max_features=10000)
-                st.session_state.vectorizer_type = "count"
-            st.write("Training Data Preview:")
-            st.write(train_df[['clean_text', 'target']].head(3))
-            # Vectorize text data
-            X = vectorizer.fit_transform(train_df['clean_text'])
-            y = train_df['target']
-            # Split data
-            X_train, X_test, y_train, y_test = process.split_data(X, y)
-            st.write(f"Data split - Train: {X_train.shape}, Test: {X_test.shape}")
-            # Save vectorizer for later use
-            vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
-            save_artifacts(vectorizer, "artifacts", vectorizer_filename)
-            if st.button("Start Training"):
-                with st.spinner("Training model..."):
-                    models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
-                    # Train selected model
-                    if model == "Logistic Regression":
-                        models.LogisticRegression()
-                    elif model == "Decision Tree":
-                        models.DecisionTree()
-                    elif model == "Linear SVC":
-                        models.LinearSVC()
-                    elif model == "SVC":
-                        models.SVC()
-                    elif model == "Multinomial Naive Bayes":
-                        models.MultinomialNB()
-                    elif model == "Random Forest":
-                        models.RandomForestClassifier()
-                    elif model == "Gaussian Naive Bayes":
-                        models.GaussianNB()
-                st.success("Model training completed!")
-                st.info("You can now use the 'Predictions' section to classify new text.")
         except Exception as e:
-            st.error(f"Error in model training: {str(e)}")
     else:
-        st.warning("Please upload training data to train a model")
 # Predictions Section
-elif section == "Predictions":
-    st.subheader("Perform Predictions on New Text")
-    # Check if models exist
-    if os.path.exists("models") and os.listdir("models"):
-        # Text input for prediction
-        text_input = st.text_area("Enter the text to classify:", height=100)
-        # Model selection
-        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-        if available_models:
-            selected_model = st.selectbox("Choose the trained model:", available_models)
-            # Prediction button
-            if st.button("Predict", key="single_predict"):
                 if text_input.strip():
-                    with st.spinner("Making prediction..."):
                         predicted_label, prediction_proba = predict_text(
-                            selected_model,
-                            text_input,
-                            st.session_state.get('vectorizer_type', 'tfidf')
                         )
                         if predicted_label is not None:
-                            st.success("Prediction completed!")
                             # Display results
-                            st.markdown("### Prediction Results")
-                            st.markdown(f"**Input Text:** {text_input}")
-                            st.markdown(f"**Predicted Class:** {predicted_label}")
                             # Display probabilities if available
                             if prediction_proba is not None:
                                 st.markdown("**Class Probabilities:**")
-                                # Load encoder to get class names
-                                encoder = load_artifacts("artifacts", "encoder.pkl")
-                                if encoder is not None:
-                                    classes = encoder.classes_
-                                    prob_df = pd.DataFrame({
-                                        'Class': classes,
-                                        'Probability': prediction_proba
-                                    }).sort_values('Probability', ascending=False)
-                                    st.bar_chart(prob_df.set_index('Class'))
-                                    st.dataframe(prob_df)
                 else:
-                    st.warning("Please enter some text to classify")
-        else:
-            st.warning("No trained models found. Please train a model first.")
-    else:
-        st.warning("No trained models found. Please go to 'Train Model' section to train a model first.")
-    # Option to classify multiple texts
-    st.markdown("---")
-    st.subheader("Batch Predictions")
-    uploaded_file = st.file_uploader("Upload a CSV file with text to classify", type=['csv'])
-    if uploaded_file is not None:
-        try:
-            batch_df = pd.read_csv(uploaded_file, encoding='latin1')
-            st.write("Uploaded data preview:")
-            st.write(batch_df.head())
-            # Select text column
-            text_column = st.selectbox("Select the text column:", batch_df.columns.tolist())
-            if os.path.exists("models") and os.listdir("models"):
-                available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-                batch_model = st.selectbox("Choose model for batch prediction:", available_models, key="batch_model")
-                if st.button("Run Batch Predictions", key="batch_predict"):
                     with st.spinner("Processing batch predictions..."):
-                        predictions = []
-                        for text in batch_df[text_column]:
-                            pred, _ = predict_text(
-                                batch_model,
-                                str(text),
-                                st.session_state.get('vectorizer_type', 'tfidf')
                             )
-                            predictions.append(pred if pred is not None else "Error")
-                        batch_df['Predicted_Class'] = predictions
-                        st.success("Batch predictions completed!")
-                        st.write("Results:")
-                        st.write(batch_df[[text_column, 'Predicted_Class']])
-                        # Download results
-                        csv = batch_df.to_csv(index=False)
-                        st.download_button(
-                            label="Download predictions as CSV",
-                            data=csv,
-                            file_name="batch_predictions.csv",
-                            mime="text/csv"
-                        )
-        except Exception as e:
-            st.error(f"Error in batch prediction: {str(e)}")

 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.svm import LinearSVC, SVC
+from sklearn.naive_bayes import MultinomialNB, GaussianNB
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 import os
 import pickle
+import tempfile
+import re
+import string
+from collections import Counter
+# Text Cleaning Class (replacing the custom module)
+class TextCleaner:
+    def clean_text(self, text):
+        """Clean and preprocess text"""
+        if pd.isna(text):
+            return ""
+        # Convert to lowercase
+        text = str(text).lower()
+        # Remove special characters and digits
+        text = re.sub(r'[^a-zA-Z\s]', '', text)
+        # Remove extra whitespace
+        text = ' '.join(text.split())
+        return text
+# Information Analysis Class (replacing the custom module)
+class TextInformations:
+    def __init__(self, df, text_col, target_col):
+        self.df = df
+        self.text_col = text_col
+        self.target_col = target_col
+    def shape(self):
+        return self.df.shape
+    def missing_values(self):
+        return self.df.isnull().sum().to_dict()
+    def class_imbalanced(self):
+        return self.df[self.target_col].value_counts().to_dict()
+    def clean_text(self):
+        cleaner = TextCleaner()
+        return self.df[self.text_col].apply(cleaner.clean_text)
+    def text_length(self):
+        return self.df[self.text_col].str.len()
 # Utility functions
+def save_to_session(obj, key):
+    """Save objects to session state instead of files"""
+    st.session_state[key] = obj
+def load_from_session(key):
+    """Load objects from session state"""
+    return st.session_state.get(key, None)
+def train_model(model_name, X_train, X_test, y_train, y_test):
+    """Train the selected model"""
+    if model_name == "Logistic Regression":
+        model = LogisticRegression(random_state=42, max_iter=1000)
+    elif model_name == "Decision Tree":
+        model = DecisionTreeClassifier(random_state=42)
+    elif model_name == "Random Forest":
+        model = RandomForestClassifier(random_state=42, n_estimators=100)
+    elif model_name == "Linear SVC":
+        model = LinearSVC(random_state=42, max_iter=1000)
+    elif model_name == "SVC":
+        model = SVC(random_state=42, probability=True)
+    elif model_name == "Multinomial Naive Bayes":
+        model = MultinomialNB()
+    elif model_name == "Gaussian Naive Bayes":
+        model = GaussianNB()
+    # Train model
+    model.fit(X_train, y_train)
+    # Make predictions
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    return model, accuracy
+def predict_text(text, model, vectorizer, encoder):
     """Make prediction on new text"""
     try:
+        # Clean text
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
+        # Transform text using the vectorizer
         text_vector = vectorizer.transform([clean_text])
         # Make prediction
         st.error(f"Error during prediction: {str(e)}")
         return None, None
+# Streamlit App Configuration
+st.set_page_config(
+    page_title="Text Classification App",
+    page_icon="📝",
+    layout="wide"
+)
+st.title('📝 No Code Text Classification App')
+st.markdown('Analyze your text data and train machine learning models for text classification')
+# Initialize session state
+if 'model_trained' not in st.session_state:
+    st.session_state.model_trained = False
+if 'training_data_processed' not in st.session_state:
+    st.session_state.training_data_processed = False
 # Sidebar
+st.sidebar.title("Navigation")
+section = st.sidebar.radio(
+    "Choose Section",
+    ["📊 Data Analysis", "🤖 Train Model", "🔮 Predictions"],
+    index=0
+)
+# Upload Data Section
+st.sidebar.markdown("---")
+st.sidebar.subheader("📁 Upload Your Dataset")
+# File uploader with better error handling
+try:
+    train_data = st.sidebar.file_uploader(
+        "Upload training data (CSV)",
+        type=["csv"],
+        help="Upload a CSV file with text and labels for training"
+    )
+    test_data = st.sidebar.file_uploader(
+        "Upload test data (CSV, optional)",
+        type=["csv"],
+        help="Optional: Upload a separate test dataset"
+    )
+except Exception as e:
+    st.sidebar.error(f"File upload error: {str(e)}")
+    st.sidebar.info("Try refreshing the page or using a different browser")
+# Process uploaded data
 if train_data is not None:
     try:
+        # Add encoding options to handle different CSV formats
+        encoding_option = st.sidebar.selectbox(
+            "CSV Encoding",
+            ["utf-8", "latin-1", "cp1252", "iso-8859-1"],
+            help="Try different encodings if you get errors"
+        )
+        train_df = pd.read_csv(train_data, encoding=encoding_option)
         if test_data is not None:
+            test_df = pd.read_csv(test_data, encoding=encoding_option)
         else:
             test_df = None
+        st.sidebar.success(f"✅ Training data loaded: {train_df.shape[0]} rows, {train_df.shape[1]} columns")
+        # Column selection
         columns = train_df.columns.tolist()
+        text_data = st.sidebar.selectbox("📝 Choose the text column:", columns)
+        target = st.sidebar.selectbox("🎯 Choose the target column:", columns)
+        # Store processed data in session state
+        st.session_state.train_df = train_df
+        st.session_state.test_df = test_df
+        st.session_state.text_col = text_data
+        st.session_state.target_col = target
+        st.session_state.training_data_processed = True
     except Exception as e:
+        st.sidebar.error(f"❌ Error loading data: {str(e)}")
+        st.sidebar.info("Please check your CSV file format and encoding")
 # Data Analysis Section
+if section == "📊 Data Analysis":
+    st.header("📊 Data Analysis")
+    if st.session_state.get('training_data_processed', False):
         try:
+            train_df = st.session_state.train_df
+            text_col = st.session_state.text_col
+            target_col = st.session_state.target_col
+            # Create info object
+            info = TextInformations(train_df, text_col, target_col)
+            # Data preprocessing
+            train_df['clean_text'] = info.clean_text()
+            train_df['text_length'] = info.text_length()
+            # Display basic information
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric("Dataset Shape", f"{info.shape()[0]} × {info.shape()[1]}")
+            with col2:
+                missing_vals = sum(info.missing_values().values())
+                st.metric("Missing Values", missing_vals)
+            with col3:
+                unique_classes = len(info.class_imbalanced())
+                st.metric("Unique Classes", unique_classes)
+            # Data preview
+            st.subheader("📋 Data Preview")
+            st.dataframe(train_df[[text_col, target_col, 'clean_text', 'text_length']].head(10))
+            # Class distribution
+            st.subheader("📊 Class Distribution")
+            class_counts = info.class_imbalanced()
+            col1, col2 = st.columns(2)
+            with col1:
+                fig, ax = plt.subplots(figsize=(8, 6))
+                classes = list(class_counts.keys())
+                counts = list(class_counts.values())
+                ax.bar(classes, counts, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8'])
+                ax.set_title('Class Distribution')
+                ax.set_xlabel('Classes')
+                ax.set_ylabel('Count')
+                plt.xticks(rotation=45)
+                st.pyplot(fig)
+            with col2:
+                st.write("**Class Distribution:**")
+                for class_name, count in class_counts.items():
+                    percentage = (count / len(train_df)) * 100
+                    st.write(f"- {class_name}: {count} ({percentage:.1f}%)")
+            # Text length analysis
+            st.subheader("📏 Text Length Analysis")
+            col1, col2 = st.columns(2)
+            with col1:
+                fig, ax = plt.subplots(figsize=(8, 6))
+                ax.hist(train_df['text_length'], bins=50, alpha=0.7, color='#4ECDC4')
+                ax.set_title('Text Length Distribution')
+                ax.set_xlabel('Text Length (characters)')
+                ax.set_ylabel('Frequency')
+                st.pyplot(fig)
+            with col2:
+                st.write("**Text Length Statistics:**")
+                length_stats = train_df['text_length'].describe()
+                for stat, value in length_stats.items():
+                    st.write(f"- {stat.title()}: {value:.1f}")
+            # Update session state
+            st.session_state.processed_train_df = train_df
         except Exception as e:
+            st.error(f"❌ Error in data analysis: {str(e)}")
     else:
+        st.info("🔄 Please upload training data to perform analysis")
 # Train Model Section
+elif section == "🤖 Train Model":
+    st.header("🤖 Train Model")
+    if st.session_state.get('training_data_processed', False):
         try:
+            if 'processed_train_df' in st.session_state:
+                train_df = st.session_state.processed_train_df
+            else:
+                # Process data if not already processed
+                train_df = st.session_state.train_df
+                text_col = st.session_state.text_col
+                target_col = st.session_state.target_col
+                info = TextInformations(train_df, text_col, target_col)
+                train_df['clean_text'] = info.clean_text()
+                train_df['text_length'] = info.text_length()
+            # Model and vectorizer selection
             col1, col2 = st.columns(2)
             with col1:
+                st.subheader("🎯 Model Selection")
+                model_name = st.selectbox("Choose the Model", [
                     "Logistic Regression", "Decision Tree",
                     "Random Forest", "Linear SVC", "SVC",
                     "Multinomial Naive Bayes", "Gaussian Naive Bayes"
                 ])
             with col2:
+                st.subheader("📊 Vectorizer Selection")
+                vectorizer_choice = st.selectbox("Choose Vectorizer", ["TF-IDF", "Count"])
+            # Training parameters
+            st.subheader("⚙️ Training Parameters")
+            col1, col2 = st.columns(2)
+            with col1:
+                max_features = st.slider("Max Features", 1000, 20000, 10000, 1000)
+                test_size = st.slider("Test Size", 0.1, 0.5, 0.2, 0.05)
+            with col2:
+                random_state = st.number_input("Random State", 0, 100, 42)
+            # Training button
+            if st.button("🚀 Start Training", type="primary"):
+                with st.spinner("Training model... Please wait"):
+                    try:
+                        # Prepare data
+                        X_text = train_df['clean_text'].fillna('')
+                        y = train_df[st.session_state.target_col]
+                        # Label encoding
+                        label_encoder = LabelEncoder()
+                        y_encoded = label_encoder.fit_transform(y)
+                        # Vectorization
+                        if vectorizer_choice == "TF-IDF":
+                            vectorizer = TfidfVectorizer(max_features=max_features, stop_words='english')
+                        else:
+                            vectorizer = CountVectorizer(max_features=max_features, stop_words='english')
+                        X_vectorized = vectorizer.fit_transform(X_text)
+                        # Train-test split
+                        X_train, X_test, y_train, y_test = train_test_split(
+                            X_vectorized, y_encoded,
+                            test_size=test_size,
+                            random_state=random_state,
+                            stratify=y_encoded
+                        )
+                        # Train model
+                        model, accuracy = train_model(model_name, X_train, X_test, y_train, y_test)
+                        # Save to session state
+                        save_to_session(model, 'trained_model')
+                        save_to_session(vectorizer, 'vectorizer')
+                        save_to_session(label_encoder, 'label_encoder')
+                        save_to_session(model_name, 'model_name')
+                        save_to_session(vectorizer_choice, 'vectorizer_type')
+                        st.session_state.model_trained = True
+                        # Display results
+                        st.success(f"✅ Model training completed!")
+                        col1, col2 = st.columns(2)
+                        with col1:
+                            st.metric("Model Accuracy", f"{accuracy:.4f}")
+                        with col2:
+                            st.metric("Training Samples", len(X_train))
+                        st.info("🎉 You can now use the 'Predictions' section to classify new text!")
+                    except Exception as e:
+                        st.error(f"❌ Error during training: {str(e)}")
         except Exception as e:
+            st.error(f"❌ Error in model training setup: {str(e)}")
     else:
+        st.info("🔄 Please upload and analyze training data first")
 # Predictions Section
+elif section == "🔮 Predictions":
+    st.header("🔮 Make Predictions")
+    if st.session_state.get('model_trained', False):
+        # Single text prediction
+        st.subheader("📝 Single Text Prediction")
+        text_input = st.text_area(
+            "Enter text to classify:",
+            height=120,
+            placeholder="Type or paste your text here..."
+        )
+        col1, col2 = st.columns([1, 3])
+        with col1:
+            if st.button("🔮 Predict", type="primary"):
                 if text_input.strip():
+                    try:
+                        model = load_from_session('trained_model')
+                        vectorizer = load_from_session('vectorizer')
+                        encoder = load_from_session('label_encoder')
                         predicted_label, prediction_proba = predict_text(
+                            text_input, model, vectorizer, encoder
                         )
                         if predicted_label is not None:
+                            st.success("✅ Prediction completed!")
                             # Display results
+                            st.markdown("### 📊 Results")
+                            st.markdown(f"**Predicted Class:** `{predicted_label}`")
                             # Display probabilities if available
                             if prediction_proba is not None:
                                 st.markdown("**Class Probabilities:**")
+                                classes = encoder.classes_
+                                prob_data = pd.DataFrame({
+                                    'Class': classes,
+                                    'Probability': prediction_proba
+                                }).sort_values('Probability', ascending=False)
+                                # Show as bar chart
+                                st.bar_chart(prob_data.set_index('Class'))
+                                # Show as table
+                                st.dataframe(prob_data, use_container_width=True)
+                    except Exception as e:
+                        st.error(f"❌ Prediction error: {str(e)}")
                 else:
+                    st.warning("⚠️ Please enter some text to classify")
+        # Batch predictions
+        st.markdown("---")
+        st.subheader("📁 Batch Predictions")
+        uploaded_batch = st.file_uploader(
+            "Upload CSV file for batch predictions",
+            type=['csv'],
+            help="Upload a CSV file with text data to classify multiple texts at once"
+        )
+        if uploaded_batch is not None:
+            try:
+                # Load batch data
+                encoding_option = st.selectbox(
+                    "Batch CSV Encoding",
+                    ["utf-8", "latin-1", "cp1252", "iso-8859-1"],
+                    key="batch_encoding"
+                )
+                batch_df = pd.read_csv(uploaded_batch, encoding=encoding_option)
+                st.write("📋 **Batch Data Preview:**")
+                st.dataframe(batch_df.head())
+                # Select text column
+                text_column = st.selectbox(
+                    "Select the text column:",
+                    batch_df.columns.tolist()
+                )
+                if st.button("🚀 Run Batch Predictions", type="primary"):
                     with st.spinner("Processing batch predictions..."):
+                        try:
+                            model = load_from_session('trained_model')
+                            vectorizer = load_from_session('vectorizer')
+                            encoder = load_from_session('label_encoder')
+                            predictions = []
+                            confidences = []
+                            progress_bar = st.progress(0)
+                            total_rows = len(batch_df)
+                            for idx, text in enumerate(batch_df[text_column]):
+                                pred, pred_proba = predict_text(
+                                    str(text), model, vectorizer, encoder
+                                )
+                                predictions.append(pred if pred is not None else "Error")
+                                # Get confidence (max probability)
+                                if pred_proba is not None:
+                                    confidences.append(max(pred_proba))
+                                else:
+                                    confidences.append(0.0)
+                                progress_bar.progress((idx + 1) / total_rows)
+                            batch_df['Predicted_Class'] = predictions
+                            batch_df['Confidence'] = confidences
+                            st.success("✅ Batch predictions completed!")
+                            # Show results
+                            st.write("📊 **Prediction Results:**")
+                            st.dataframe(batch_df[[text_column, 'Predicted_Class', 'Confidence']])
+                            # Download results
+                            csv = batch_df.to_csv(index=False)
+                            st.download_button(
+                                label="📥 Download Results as CSV",
+                                data=csv,
+                                file_name="batch_predictions.csv",
+                                mime="text/csv"
                             )
+                        except Exception as e:
+                            st.error(f"❌ Batch prediction error: {str(e)}")
+            except Exception as e:
+                st.error(f"❌ Error loading batch file: {str(e)}")
+    else:
+        st.info("🔄 Please train a model first before making predictions")
+        # Show model info if available
+        if st.session_state.get('training_data_processed', False):
+            st.write("💡 **Tip:** Go to the 'Train Model' section to train a model first!")
+# Footer
+st.markdown("---")
+st.markdown(
+    """
+    <div style='text-align: center; color: #666; padding: 20px;'>
+        <p>📝 No Code Text Classification App</p>
+        <p>Built with Streamlit • Upload CSV → Analyze → Train → Predict</p>
+    </div>
+    """,
+    unsafe_allow_html=True
+)