Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 6, 2025

Commit

3b9b877

verified ·

1 Parent(s): b136104

Update app.py

Browse files

Files changed (1) hide show

app.py +295 -431

app.py CHANGED Viewed

@@ -2,471 +2,335 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.svm import LinearSVC, SVC
-from sklearn.naive_bayes import MultinomialNB, GaussianNB
-from sklearn.preprocessing import LabelEncoder
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
-import re
-import string
-import nltk
 import os
 import pickle
-import io
-import base64
-# Download required NLTK data
-try:
-    nltk.data.find('corpora/stopwords')
-except LookupError:
-    nltk.download('stopwords', quiet=True)
-try:
-    nltk.data.find('corpora/wordnet')
-except LookupError:
-    nltk.download('wordnet', quiet=True)
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-# Set page config
-st.set_page_config(
-    page_title="No Code Text Classification",
-    page_icon="📝",
-    layout="wide"
-)
-# Initialize session state
-if 'trained_model' not in st.session_state:
-    st.session_state.trained_model = None
-if 'vectorizer' not in st.session_state:
-    st.session_state.vectorizer = None
-if 'label_encoder' not in st.session_state:
-    st.session_state.label_encoder = None
-if 'vectorizer_type' not in st.session_state:
-    st.session_state.vectorizer_type = 'tfidf'
-if 'train_df' not in st.session_state:
-    st.session_state.train_df = None
-# Text cleaning class
-class TextCleaner:
-    def __init__(self):
-        self.stop_words = set(stopwords.words('english'))
-        self.lemmatizer = WordNetLemmatizer()
-    def clean_text(self, text):
-        if pd.isna(text):
-            return ""
-        # Convert to lowercase
-        text = str(text).lower()
-        # Remove URLs
-        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
-        # Remove user mentions and hashtags
-        text = re.sub(r'@\w+|#\w+', '', text)
-        # Remove punctuation
-        text = text.translate(str.maketrans('', '', string.punctuation))
-        # Remove extra whitespace
-        text = re.sub(r'\s+', ' ', text).strip()
-        # Remove stopwords and lemmatize
-        words = text.split()
-        words = [self.lemmatizer.lemmatize(word) for word in words if word not in self.stop_words]
-        return ' '.join(words)
-# Utility functions
-def create_download_link(val, filename):
-    """Generate a download link for a file"""
-    b64 = base64.b64encode(val)
-    return f'<a href="data:application/octet-stream;base64,{b64.decode()}" download="{filename}">Download {filename}</a>'
-def safe_file_read(uploaded_file):
-    """Safely read uploaded file with multiple encoding attempts"""
     try:
-        # Try UTF-8 first
-        return pd.read_csv(uploaded_file, encoding='utf-8')
-    except UnicodeDecodeError:
         try:
-            # Try latin1
-            uploaded_file.seek(0)  # Reset file pointer
-            return pd.read_csv(uploaded_file, encoding='latin1')
-        except:
-            try:
-                # Try cp1252
-                uploaded_file.seek(0)
-                return pd.read_csv(uploaded_file, encoding='cp1252')
-            except Exception as e:
-                st.error(f"Error reading file: {str(e)}")
-                return None
-# Data Analysis Functions
-def get_data_insights(df, text_col, target_col):
-    """Get basic insights from the data"""
-    insights = {}
-    # Basic info
-    insights['shape'] = df.shape
-    insights['missing_values'] = df.isnull().sum().to_dict()
-    # Class distribution
-    insights['class_distribution'] = df[target_col].value_counts().to_dict()
-    # Text length analysis
-    df['text_length'] = df[text_col].astype(str).str.len()
-    insights['avg_text_length'] = df['text_length'].mean()
-    insights['min_text_length'] = df['text_length'].min()
-    insights['max_text_length'] = df['text_length'].max()
-    return insights
-def create_visualizations(df, text_col, target_col):
-    """Create visualizations for the data"""
-    # Class distribution
-    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
-    # Class distribution bar plot
-    class_counts = df[target_col].value_counts()
-    ax1.bar(class_counts.index, class_counts.values)
-    ax1.set_title('Class Distribution')
-    ax1.set_xlabel('Classes')
-    ax1.set_ylabel('Count')
-    ax1.tick_params(axis='x', rotation=45)
-    # Text length distribution
-    df['text_length'] = df[text_col].astype(str).str.len()
-    ax2.hist(df['text_length'], bins=30, alpha=0.7)
-    ax2.set_title('Text Length Distribution')
-    ax2.set_xlabel('Text Length')
-    ax2.set_ylabel('Frequency')
-    plt.tight_layout()
-    st.pyplot(fig)
-# Model Training Functions
-def train_model(X_train, X_test, y_train, y_test, model_name):
-    """Train the selected model"""
-    models = {
-        'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
-        'Decision Tree': DecisionTreeClassifier(random_state=42),
-        'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
-        'Linear SVC': LinearSVC(random_state=42, max_iter=1000),
-        'SVC': SVC(random_state=42, probability=True),
-        'Multinomial Naive Bayes': MultinomialNB(),
-        'Gaussian Naive Bayes': GaussianNB()
-    }
-    model = models[model_name]
-    # Handle sparse matrices for Gaussian NB
-    if model_name == 'Gaussian Naive Bayes':
-        if hasattr(X_train, 'toarray'):
-            X_train = X_train.toarray()
-            X_test = X_test.toarray()
-    # Train model
-    model.fit(X_train, y_train)
-    # Make predictions
-    y_pred = model.predict(X_test)
-    # Calculate metrics
-    accuracy = accuracy_score(y_test, y_pred)
-    return model, accuracy, y_pred
-# Main App
-st.title('🔤 No Code Text Classification App')
-st.markdown('Upload your data, analyze it, train models, and make predictions without writing any code!')
-# Sidebar
-st.sidebar.header("📁 Data Upload")
-# File upload with better error handling
-train_data = st.sidebar.file_uploader(
-    "Upload training data (CSV)",
-    type=["csv"],
-    help="Upload a CSV file with text and labels"
-)
-# Process uploaded data
-if train_data is not None:
-    try:
-        with st.spinner("Loading data..."):
-            train_df = safe_file_read(train_data)
-        if train_df is not None:
-            st.session_state.train_df = train_df
-            st.sidebar.success(f"✅ Data loaded: {train_df.shape[0]} rows, {train_df.shape[1]} columns")
-            # Column selection
-            columns = train_df.columns.tolist()
-            text_col = st.sidebar.selectbox("📝 Select text column:", columns, key="text_col")
-            target_col = st.sidebar.selectbox("🎯 Select target column:", columns, key="target_col")
-            if text_col and target_col and text_col != target_col:
-                # Clean and prepare data
-                with st.spinner("Preprocessing data..."):
-                    text_cleaner = TextCleaner()
-                    train_df['clean_text'] = train_df[text_col].apply(text_cleaner.clean_text)
-                    # Encode labels
-                    label_encoder = LabelEncoder()
-                    train_df['encoded_target'] = label_encoder.fit_transform(train_df[target_col])
-                    st.session_state.label_encoder = label_encoder
-                # Main sections
-                tab1, tab2, tab3 = st.tabs(["📊 Data Analysis", "🤖 Train Model", "🔍 Predictions"])
-                # Data Analysis Tab
-                with tab1:
-                    st.header("📊 Data Analysis")
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.subheader("📈 Dataset Overview")
-                        insights = get_data_insights(train_df, text_col, target_col)
-                        st.metric("Total Samples", insights['shape'][0])
-                        st.metric("Number of Features", insights['shape'][1])
-                        st.metric("Average Text Length", f"{insights['avg_text_length']:.1f}")
-                        st.subheader("🎯 Class Distribution")
-                        class_dist_df = pd.DataFrame(list(insights['class_distribution'].items()),
-                                                   columns=['Class', 'Count'])
-                        st.dataframe(class_dist_df, use_container_width=True)
-                    with col2:
-                        st.subheader("📋 Data Preview")
-                        preview_df = train_df[[text_col, target_col]].head()
-                        st.dataframe(preview_df, use_container_width=True)
-                        st.subheader("🧹 Cleaned Text Preview")
-                        cleaned_preview = train_df[['clean_text', target_col]].head()
-                        st.dataframe(cleaned_preview, use_container_width=True)
-                    st.subheader("📊 Visualizations")
-                    create_visualizations(train_df, text_col, target_col)
-                # Train Model Tab
-                with tab2:
-                    st.header("🤖 Train Model")
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.subheader("🔧 Model Selection")
-                        model_name = st.selectbox(
-                            "Choose a model:",
-                            ["Logistic Regression", "Decision Tree", "Random Forest",
-                             "Linear SVC", "SVC", "Multinomial Naive Bayes", "Gaussian Naive Bayes"]
-                        )
-                    with col2:
-                        st.subheader("📊 Vectorizer Selection")
-                        vectorizer_type = st.selectbox(
-                            "Choose vectorizer:",
-                            ["TF-IDF Vectorizer", "Count Vectorizer"]
                         )
-                    # Training parameters
-                    st.subheader("⚙️ Training Parameters")
-                    col3, col4 = st.columns(2)
-                    with col3:
-                        test_size = st.slider("Test size", 0.1, 0.5, 0.2, 0.05)
-                        max_features = st.number_input("Max features", 1000, 20000, 10000, 1000)
-                    if st.button("🚀 Train Model", type="primary"):
-                        try:
-                            with st.spinner("Training model... This may take a few minutes."):
-                                # Initialize vectorizer
-                                if vectorizer_type == "TF-IDF Vectorizer":
-                                    vectorizer = TfidfVectorizer(max_features=max_features, stop_words='english')
-                                    st.session_state.vectorizer_type = 'tfidf'
-                                else:
-                                    vectorizer = CountVectorizer(max_features=max_features, stop_words='english')
-                                    st.session_state.vectorizer_type = 'count'
-                                # Vectorize text
-                                X = vectorizer.fit_transform(train_df['clean_text'])
-                                y = train_df['encoded_target']
-                                # Split data
-                                X_train, X_test, y_train, y_test = train_test_split(
-                                    X, y, test_size=test_size, random_state=42, stratify=y
-                                )
-                                # Train model
-                                model, accuracy, y_pred = train_model(X_train, X_test, y_train, y_test, model_name)
-                                # Store in session state
-                                st.session_state.trained_model = model
-                                st.session_state.vectorizer = vectorizer
-                                # Display results
-                                st.success("🎉 Model training completed!")
-                                col5, col6 = st.columns(2)
-                                with col5:
-                                    st.metric("🎯 Accuracy", f"{accuracy:.4f}")
-                                    st.metric("🏋️ Training Samples", len(X_train))
-                                    st.metric("🧪 Test Samples", len(X_test))
-                                with col6:
-                                    st.subheader("📊 Classification Report")
-                                    report = classification_report(y_test, y_pred,
-                                                                 target_names=label_encoder.classes_,
-                                                                 output_dict=True)
-                                    report_df = pd.DataFrame(report).transpose()
-                                    st.dataframe(report_df.round(3), use_container_width=True)
-                        except Exception as e:
-                            st.error(f"❌ Error during training: {str(e)}")
-                # Predictions Tab
-                with tab3:
-                    st.header("🔍 Make Predictions")
-                    if st.session_state.trained_model is not None:
-                        # Single prediction
-                        st.subheader("📝 Single Text Prediction")
-                        user_input = st.text_area("Enter text to classify:", height=100)
-                        if st.button("🔮 Predict", type="primary"):
-                            if user_input.strip():
-                                try:
-                                    with st.spinner("Making prediction..."):
-                                        # Clean and vectorize input
-                                        text_cleaner = TextCleaner()
-                                        clean_input = text_cleaner.clean_text(user_input)
-                                        input_vector = st.session_state.vectorizer.transform([clean_input])
-                                        # Handle sparse matrix for Gaussian NB
-                                        if isinstance(st.session_state.trained_model, GaussianNB):
-                                            input_vector = input_vector.toarray()
-                                        # Make prediction
-                                        prediction = st.session_state.trained_model.predict(input_vector)[0]
-                                        predicted_label = st.session_state.label_encoder.inverse_transform([prediction])[0]
-                                        # Get probabilities if available
-                                        if hasattr(st.session_state.trained_model, 'predict_proba'):
-                                            try:
-                                                proba = st.session_state.trained_model.predict_proba(input_vector)[0]
-                                                st.success("🎉 Prediction completed!")
-                                                st.write(f"**Input:** {user_input}")
-                                                st.write(f"**Predicted Class:** {predicted_label}")
-                                                # Show probabilities
-                                                st.subheader("📊 Class Probabilities")
-                                                prob_df = pd.DataFrame({
-                                                    'Class': st.session_state.label_encoder.classes_,
-                                                    'Probability': proba
-                                                }).sort_values('Probability', ascending=False)
-                                                st.bar_chart(prob_df.set_index('Class'))
-                                                st.dataframe(prob_df.round(4), use_container_width=True)
-                                            except:
-                                                st.success("🎉 Prediction completed!")
-                                                st.write(f"**Predicted Class:** {predicted_label}")
-                                        else:
-                                            st.success("🎉 Prediction completed!")
-                                            st.write(f"**Predicted Class:** {predicted_label}")
-                                except Exception as e:
-                                    st.error(f"❌ Error during prediction: {str(e)}")
-                            else:
-                                st.warning("⚠️ Please enter some text to classify")
-                        # Batch predictions
-                        st.subheader("📊 Batch Predictions")
-                        batch_file = st.file_uploader("Upload CSV for batch predictions", type=["csv"])
-                        if batch_file is not None:
-                            try:
-                                batch_df = safe_file_read(batch_file)
-                                if batch_df is not None:
-                                    st.write("**Preview:**")
-                                    st.dataframe(batch_df.head(), use_container_width=True)
-                                    batch_text_col = st.selectbox("Select text column for prediction:",
-                                                                 batch_df.columns.tolist())
-                                    if st.button("🚀 Run Batch Predictions"):
-                                        with st.spinner("Processing batch predictions..."):
-                                            text_cleaner = TextCleaner()
-                                            predictions = []
-                                            for text in batch_df[batch_text_col]:
-                                                try:
-                                                    clean_text = text_cleaner.clean_text(str(text))
-                                                    text_vector = st.session_state.vectorizer.transform([clean_text])
-                                                    if isinstance(st.session_state.trained_model, GaussianNB):
-                                                        text_vector = text_vector.toarray()
-                                                    pred = st.session_state.trained_model.predict(text_vector)[0]
-                                                    pred_label = st.session_state.label_encoder.inverse_transform([pred])[0]
-                                                    predictions.append(pred_label)
-                                                except:
-                                                    predictions.append("Error")
-                                            batch_df['Predicted_Class'] = predictions
-                                            st.success("🎉 Batch predictions completed!")
-                                            st.dataframe(batch_df, use_container_width=True)
-                                            # Download results
-                                            csv_data = batch_df.to_csv(index=False)
-                                            st.download_button(
-                                                label="📥 Download Results",
-                                                data=csv_data,
-                                                file_name="batch_predictions.csv",
-                                                mime="text/csv"
-                                            )
-                            except Exception as e:
-                                st.error(f"❌ Error processing batch file: {str(e)}")
-                    else:
-                        st.warning("⚠️ No trained model found. Please train a model first in the 'Train Model' tab.")
-            else:
-                st.warning("⚠️ Please select different columns for text and target.")
-    except Exception as e:
-        st.error(f"❌ Error loading file: {str(e)}")
-        st.info("💡 Try these solutions:")
-        st.write("- Check if the file is a valid CSV")
-        st.write("- Ensure the file is not corrupted")
-        st.write("- Try saving the file with UTF-8 encoding")
-else:
-    st.info("👆 Please upload a CSV file to get started")
-    # Show example data format
-    st.subheader("📋 Expected Data Format")
-    example_df = pd.DataFrame({
-        'text': [
-            "This product is amazing! I love it.",
-            "Terrible quality, waste of money.",
-            "Good value for the price.",
-            "Not what I expected, disappointed."
-        ],
-        'sentiment': ['positive', 'negative', 'positive', 'negative']
-    })
-    st.dataframe(example_df, use_container_width=True)
-# Footer
-st.markdown("---")
-st.markdown("Built with ❤️ using Streamlit | No Code Text Classification App")

 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
+from NoCodeTextClassifier.EDA import Informations, Visualizations
 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
+from NoCodeTextClassifier.preprocessing import process, TextCleaner, Vectorization
+from NoCodeTextClassifier.models import Models
 import os
 import pickle
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+# Utility functions
+def save_artifacts(obj, folder_name, file_name):
+    """Save artifacts like encoders and vectorizers"""
+    os.makedirs(folder_name, exist_ok=True)
+    with open(os.path.join(folder_name, file_name), 'wb') as f:
+        pickle.dump(obj, f)
+def load_artifacts(folder_name, file_name):
+    """Load saved artifacts"""
+    try:
+        with open(os.path.join(folder_name, file_name), 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        st.error(f"File {file_name} not found in {folder_name} folder")
+        return None
+def load_model(model_name):
+    """Load trained model"""
+    try:
+        with open(os.path.join('models', model_name), 'rb') as f:
+            return pickle.load(f)
+    except FileNotFoundError:
+        st.error(f"Model {model_name} not found. Please train a model first.")
+        return None
+def predict_text(model_name, text, vectorizer_type="tfidf"):
+    """Make prediction on new text"""
+    try:
+        # Load model
+        model = load_model(model_name)
+        if model is None:
+            return None, None
+        # Load vectorizer
+        vectorizer_file = f"{vectorizer_type}_vectorizer.pkl"
+        vectorizer = load_artifacts("artifacts", vectorizer_file)
+        if vectorizer is None:
+            return None, None
+        # Load label encoder
+        encoder = load_artifacts("artifacts", "encoder.pkl")
+        if encoder is None:
+            return None, None
+        # Clean and vectorize text
+        text_cleaner = TextCleaner()
+        clean_text = text_cleaner.clean_text(text)
+        # Transform text using the same vectorizer used during training
+        text_vector = vectorizer.transform([clean_text])
+        # Make prediction
+        prediction = model.predict(text_vector)
+        prediction_proba = None
+        # Get prediction probabilities if available
+        if hasattr(model, 'predict_proba'):
+            try:
+                prediction_proba = model.predict_proba(text_vector)[0]
+            except:
+                pass
+        # Decode prediction
+        predicted_label = encoder.inverse_transform(prediction)[0]
+        return predicted_label, prediction_proba
+    except Exception as e:
+        st.error(f"Error during prediction: {str(e)}")
+        return None, None
+# Streamlit App
+st.title('No Code Text Classification App')
+st.write('Understand the behavior of your text data and train a model to classify the text data')
+# Sidebar
+section = st.sidebar.radio("Choose Section", ["Data Analysis", "Train Model", "Predictions"])
+# Upload Data
+st.sidebar.subheader("Upload Your Dataset")
+train_data = st.sidebar.file_uploader("Upload training data", type=["csv"])
+test_data = st.sidebar.file_uploader("Upload test data (optional)", type=["csv"])
+# Global variables to store data and settings
+if 'vectorizer_type' not in st.session_state:
+    st.session_state.vectorizer_type = "tfidf"
+if train_data is not None:
     try:
+        train_df = pd.read_csv(train_data, encoding='latin1')
+        if test_data is not None:
+            test_df = pd.read_csv(test_data, encoding='latin1')
+        else:
+            test_df = None
+        st.write("Training Data Preview:")
+        st.write(train_df.head(3))
+        columns = train_df.columns.tolist()
+        text_data = st.sidebar.selectbox("Choose the text column:", columns)
+        target = st.sidebar.selectbox("Choose the target column:", columns)
+        # Process data
+        info = Informations(train_df, text_data, target)
+        train_df['clean_text'] = info.clean_text()
+        train_df['text_length'] = info.text_length()
+        # Handle label encoding manually if the class doesn't store encoder
+        from sklearn.preprocessing import LabelEncoder
+        label_encoder = LabelEncoder()
+        train_df['target'] = label_encoder.fit_transform(train_df[target])
+        # Save label encoder for later use
+        os.makedirs("artifacts", exist_ok=True)
+        save_artifacts(label_encoder, "artifacts", "encoder.pkl")
+    except Exception as e:
+        st.error(f"Error loading data: {str(e)}")
+        train_df = None
+        info = None
+# Data Analysis Section
+if section == "Data Analysis":
+    if train_data is not None and train_df is not None:
         try:
+            st.subheader("Get Insights from the Data")
+            st.write("Data Shape:", info.shape())
+            st.write("Class Imbalance:", info.class_imbalanced())
+            st.write("Missing Values:", info.missing_values())
+            st.write("Processed Data Preview:")
+            st.write(train_df[['clean_text', 'text_length', 'target']].head(3))
+            st.markdown("**Text Length Analysis**")
+            st.write(info.analysis_text_length('text_length'))
+            # Calculate correlation manually since we handled encoding separately
+            correlation = train_df[['text_length', 'target']].corr().iloc[0, 1]
+            st.write(f"Correlation between Text Length and Target: {correlation:.4f}")
+            st.subheader("Visualizations")
+            vis = Visualizations(train_df, text_data, target)
+            vis.class_distribution()
+            vis.text_length_distribution()
+        except Exception as e:
+            st.error(f"Error in data analysis: {str(e)}")
+    else:
+        st.warning("Please upload training data to get insights")
+# Train Model Section
+elif section == "Train Model":
+    if train_data is not None and train_df is not None:
+        try:
+            st.subheader("Train a Model")
+            # Create two columns for model selection
+            col1, col2 = st.columns(2)
+            with col1:
+                model = st.radio("Choose the Model", [
+                    "Logistic Regression", "Decision Tree",
+                    "Random Forest", "Linear SVC", "SVC",
+                    "Multinomial Naive Bayes", "Gaussian Naive Bayes"
+                ])
+            with col2:
+                vectorizer_choice = st.radio("Choose Vectorizer", ["Tfidf Vectorizer", "Count Vectorizer"])
+            # Initialize vectorizer
+            if vectorizer_choice == "Tfidf Vectorizer":
+                vectorizer = TfidfVectorizer(max_features=10000)
+                st.session_state.vectorizer_type = "tfidf"
+            else:
+                vectorizer = CountVectorizer(max_features=10000)
+                st.session_state.vectorizer_type = "count"
+            st.write("Training Data Preview:")
+            st.write(train_df[['clean_text', 'target']].head(3))
+            # Vectorize text data
+            X = vectorizer.fit_transform(train_df['clean_text'])
+            y = train_df['target']
+            # Split data
+            X_train, X_test, y_train, y_test = process.split_data(X, y)
+            st.write(f"Data split - Train: {X_train.shape}, Test: {X_test.shape}")
+            # Save vectorizer for later use
+            vectorizer_filename = f"{st.session_state.vectorizer_type}_vectorizer.pkl"
+            save_artifacts(vectorizer, "artifacts", vectorizer_filename)
+            if st.button("Start Training"):
+                with st.spinner("Training model..."):
+                    models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
+                    # Train selected model
+                    if model == "Logistic Regression":
+                        models.LogisticRegression()
+                    elif model == "Decision Tree":
+                        models.DecisionTree()
+                    elif model == "Linear SVC":
+                        models.LinearSVC()
+                    elif model == "SVC":
+                        models.SVC()
+                    elif model == "Multinomial Naive Bayes":
+                        models.MultinomialNB()
+                    elif model == "Random Forest":
+                        models.RandomForestClassifier()
+                    elif model == "Gaussian Naive Bayes":
+                        models.GaussianNB()
+                st.success("Model training completed!")
+                st.info("You can now use the 'Predictions' section to classify new text.")
+        except Exception as e:
+            st.error(f"Error in model training: {str(e)}")
+    else:
+        st.warning("Please upload training data to train a model")
+# Predictions Section
+elif section == "Predictions":
+    st.subheader("Perform Predictions on New Text")
+    # Check if models exist
+    if os.path.exists("models") and os.listdir("models"):
+        # Text input for prediction
+        text_input = st.text_area("Enter the text to classify:", height=100)
+        # Model selection
+        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+        if available_models:
+            selected_model = st.selectbox("Choose the trained model:", available_models)
+            # Prediction button
+            if st.button("Predict", key="single_predict"):
+                if text_input.strip():
+                    with st.spinner("Making prediction..."):
+                        predicted_label, prediction_proba = predict_text(
+                            selected_model,
+                            text_input,
+                            st.session_state.get('vectorizer_type', 'tfidf')
                         )
+                        if predicted_label is not None:
+                            st.success("Prediction completed!")
+                            # Display results
+                            st.markdown("### Prediction Results")
+                            st.markdown(f"**Input Text:** {text_input}")
+                            st.markdown(f"**Predicted Class:** {predicted_label}")
+                            # Display probabilities if available
+                            if prediction_proba is not None:
+                                st.markdown("**Class Probabilities:**")
+                                # Load encoder to get class names
+                                encoder = load_artifacts("artifacts", "encoder.pkl")
+                                if encoder is not None:
+                                    classes = encoder.classes_
+                                    prob_df = pd.DataFrame({
+                                        'Class': classes,
+                                        'Probability': prediction_proba
+                                    }).sort_values('Probability', ascending=False)
+                                    st.bar_chart(prob_df.set_index('Class'))
+                                    st.dataframe(prob_df)
+                else:
+                    st.warning("Please enter some text to classify")
+        else:
+            st.warning("No trained models found. Please train a model first.")
+    else:
+        st.warning("No trained models found. Please go to 'Train Model' section to train a model first.")
+    # Option to classify multiple texts
+    st.markdown("---")
+    st.subheader("Batch Predictions")
+    uploaded_file = st.file_uploader("Upload a CSV file with text to classify", type=['csv'])
+    if uploaded_file is not None:
+        try:
+            batch_df = pd.read_csv(uploaded_file, encoding='latin1')
+            st.write("Uploaded data preview:")
+            st.write(batch_df.head())
+            # Select text column
+            text_column = st.selectbox("Select the text column:", batch_df.columns.tolist())
+            if os.path.exists("models") and os.listdir("models"):
+                available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+                batch_model = st.selectbox("Choose model for batch prediction:", available_models, key="batch_model")
+                if st.button("Run Batch Predictions", key="batch_predict"):
+                    with st.spinner("Processing batch predictions..."):
+                        predictions = []
+                        for text in batch_df[text_column]:
+                            pred, _ = predict_text(
+                                batch_model,
+                                str(text),
+                                st.session_state.get('vectorizer_type', 'tfidf')
+                            )
+                            predictions.append(pred if pred is not None else "Error")
+                        batch_df['Predicted_Class'] = predictions
+                        st.success("Batch predictions completed!")
+                        st.write("Results:")
+                        st.write(batch_df[[text_column, 'Predicted_Class']])
+                        # Download results
+                        csv = batch_df.to_csv(index=False)
+                        st.download_button(
+                            label="Download predictions as CSV",
+                            data=csv,
+                            file_name="batch_predictions.csv",
+                            mime="text/csv"
+                        )
+        except Exception as e:
+            st.error(f"Error in batch prediction: {str(e)}")