Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +1 -0
.gitignore +14 -0
BP_MHS_V1.csv +3 -0
README.md +35 -3
app.py +89 -0
best_distribution.png +0 -0
chatbot_performance_advanced.h5 +3 -0
chatbot_performance_lstm.h5 +3 -0
engine_distribution.png +0 -0
explore_data.py +37 -0
p_correlation.png +0 -0
requirements.txt +8 -0
static/css/style.css +244 -0
templates/index.html +196 -0
tokenizer.pickle +3 -0
tokenizer_advanced.pickle +3 -0
train_model.py +119 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+BP_MHS_V1.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,14 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.env
+.venv
+pip-log.txt
+pip-delete-this-directory.txt
+.vscode/
+.idea/
+*.log

BP_MHS_V1.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fdb898ed9e08da458034cb0056c9eda0eda044791f4368ed5c5f79a633ce84a
+size 21751232

README.md CHANGED Viewed

@@ -1,3 +1,35 @@
----
-license: mit
----

+# Text-Based Chatbot Performance Analysis
+This project implements an end-to-end performance analysis system for text-based chatbots using Machine Learning. It features a Bidirectional LSTM model with an Attention mechanism to evaluate chatbot responses based on provided context and facts.
+## Features
+- **Exploratory Data Analysis**: Visual insights into engine distribution and performance correlations.
+- **Advanced LSTM Model**: Uses Bidirectional LSTM and Attention layers for high-accuracy evaluation.
+- **Context-Aware Prediction**: Evaluates responses not just on linguistics but also on factual consistency.
+- **Modern Web Interface**: Glassmorphic UI with real-time performance analytics.
+- **Flask Backend**: Robust API for model inference.
+## Project Structure
+- `train_model.py`: Training pipeline for the advanced model.
+- `app.py`: Flask server for real-time predictions.
+- `explore_data.py`: EDA script for dataset visualization.
+- `BP_MHS_V1.csv`: The core dataset.
+- `templates/` & `static/`: Frontend assets.
+## How to Run
+1. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+2. Train the model:
+   ```bash
+   python train_model.py
+   ```
+3. Start the application:
+   ```bash
+   python app.py
+   ```
+4. Access the UI at `http://127.0.0.1:5000`.
+## Model Insights
+The system uses an Attention mechanism to focus on critical parts of the facts and responses, ensuring the expert verdict is both accurate and contextually relevant.

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from flask import Flask, render_template, request, jsonify
+from flask_cors import CORS
+import tensorflow as tf
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import pickle
+import numpy as np
+import os
+app = Flask(__name__)
+CORS(app)
+# Load model and tokenizer
+model = None
+tokenizer = None
+MAX_LEN = 300
+# Registry for custom layers
+def load_resources():
+    global model, tokenizer
+    model_path = 'chatbot_performance_advanced.h5'
+    tokenizer_path = 'tokenizer_advanced.pickle'
+    # Custom Attention Layer registration for loading
+    class Attention(tf.keras.layers.Layer):
+        def __init__(self, **kwargs):
+            super(Attention, self).__init__(**kwargs)
+        def build(self, input_shape):
+            self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
+            self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True)
+            super(Attention, self).build(input_shape)
+        def call(self, x):
+            e = tf.keras.backend.tanh(tf.keras.backend.dot(x, self.W) + self.b)
+            a = tf.keras.backend.softmax(e, axis=1)
+            output = x * a
+            return tf.keras.backend.sum(output, axis=1)
+    if os.path.exists(model_path) and os.path.exists(tokenizer_path):
+        model = tf.keras.models.load_model(model_path, custom_objects={'Attention': Attention})
+        with open(tokenizer_path, 'rb') as handle:
+            tokenizer = pickle.load(handle)
+        print("Advanced Model and Tokenizer loaded successfully.")
+        return True
+    return False
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/predict', methods=['POST'])
+def predict():
+    global model, tokenizer
+    if model is None or tokenizer is None:
+        if not load_resources():
+            return jsonify({
+                'error': 'Model is still training. Please wait a few minutes.',
+                'status': 'training'
+            }), 503
+    data = request.json
+    if data.get('ping'):
+        return jsonify({'status': 'ready'})
+    facts = data.get('facts', 'No context provided.')
+    question = data.get('question', '')
+    response = data.get('response', '')
+    try:
+        # Preprocess text with facts context
+        text = f"[FACTS] {facts} [QUERY] {question} [RES] {response}".lower()
+        seq = tokenizer.texts_to_sequences([text])
+        pad = pad_sequences(seq, maxlen=MAX_LEN)
+        # Prediction
+        prediction = model.predict(pad)[0][0]
+        is_best = bool(prediction > 0.5)
+        return jsonify({
+            'probability': float(prediction),
+            'is_best': is_best
+        })
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    load_resources()
+    app.run(debug=True, port=5000)

best_distribution.png ADDED Viewed

chatbot_performance_advanced.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23f766823513c756c3a9cec1327e3b3fffcd9058d12078c29e58dd4a91c20b00
+size 24397072

chatbot_performance_lstm.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da77b22c6b79bbddb794324eaf616ce9e7ee7d8fb0be287909791cb8e76377a0
+size 4220752

engine_distribution.png ADDED Viewed

explore_data.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+def explore_data(file_path):
+    df = pd.read_csv(file_path)
+    print("Dataset Shape:", df.shape)
+    print("\nColumns:", df.columns.tolist())
+    # Check for missing values
+    print("\nMissing Values:\n", df.isnull().sum())
+    # Engine distribution
+    plt.figure(figsize=(10, 6))
+    sns.countplot(x='engine', data=df)
+    plt.title('Distribution of Chatbot Engines')
+    plt.savefig('engine_distribution.png')
+    # Performance distribution (Best/Worst)
+    plt.figure(figsize=(10, 6))
+    df['best'].value_counts().plot(kind='bar')
+    plt.title('Distribution of "Best" Label')
+    plt.savefig('best_distribution.png')
+    # p1-p10 correlation
+    p_cols = [f'p{i}' for i in range(1, 11)]
+    plt.figure(figsize=(12, 10))
+    sns.heatmap(df[p_cols].astype(int).corr(), annot=True, cmap='coolwarm')
+    plt.title('Correlation between Evaluation Parameters (p1-p10)')
+    plt.savefig('p_correlation.png')
+    print("\nTarget Variable 'best' counts:")
+    print(df['best'].value_counts())
+if __name__ == "__main__":
+    explore_data('BP_MHS_V1.csv')

p_correlation.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pandas
+numpy
+tensorflow
+scikit-learn
+matplotlib
+seaborn
+flask
+flask-cors

static/css/style.css ADDED Viewed

	@@ -0,0 +1,244 @@

+:root {
+    --primary-color: #6366f1;
+    --primary-hover: #4f46e5;
+    --bg-dark: #0f172a;
+    --glass-bg: rgba(255, 255, 255, 0.05);
+    --glass-border: rgba(255, 255, 255, 0.1);
+    --text-main: #f8fafc;
+    --text-muted: #94a3b8;
+    --accent: #f43f5e;
+    --success: #10b981;
+}
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+    font-family: 'Inter', system-ui, -apple-system, sans-serif;
+}
+body {
+    background-color: var(--bg-dark);
+    color: var(--text-main);
+    min-height: 100vh;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    background-image: radial-gradient(circle at 50% 50%, #1e293b 0%, #0f172a 100%);
+}
+.container {
+    width: 90%;
+    max-width: 800px;
+    padding: 2rem;
+    background: var(--glass-bg);
+    backdrop-filter: blur(12px);
+    border: 1px solid var(--glass-border);
+    border-radius: 24px;
+    box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
+    animation: fadeIn 0.8s ease-out;
+}
+@keyframes fadeIn {
+    from {
+        opacity: 0;
+        transform: translateY(20px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+h1 {
+    font-size: 2.5rem;
+    margin-bottom: 0.5rem;
+    background: linear-gradient(to right, #818cf8, #f472b6);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    text-align: center;
+}
+p.subtitle {
+    color: var(--text-muted);
+    text-align: center;
+    margin-bottom: 2.5rem;
+}
+.form-group {
+    margin-bottom: 1.5rem;
+}
+label {
+    display: block;
+    margin-bottom: 0.5rem;
+    font-weight: 500;
+    color: var(--text-main);
+}
+textarea,
+input {
+    width: 100%;
+    padding: 1rem;
+    background: rgba(0, 0, 0, 0.2);
+    border: 1px solid var(--glass-border);
+    border-radius: 12px;
+    color: white;
+    font-size: 1rem;
+    transition: all 0.3s ease;
+}
+textarea:focus,
+input:focus {
+    outline: none;
+    border-color: var(--primary-color);
+    box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2);
+}
+button {
+    width: 100%;
+    padding: 1rem;
+    background: linear-gradient(135deg, var(--primary-color), var(--primary-hover));
+    border: none;
+    border-radius: 12px;
+    color: white;
+    font-weight: 600;
+    font-size: 1.1rem;
+    cursor: pointer;
+    transition: transform 0.2s, box-shadow 0.2s;
+    margin-top: 1rem;
+}
+button:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 15px rgba(99, 102, 241, 0.4);
+}
+button:active {
+    transform: translateY(0);
+}
+.result-container {
+    margin-top: 2rem;
+    padding: 1.5rem;
+    border-radius: 16px;
+    background: rgba(255, 255, 255, 0.03);
+    border: 1px dashed var(--glass-border);
+    display: none;
+}
+.result-header {
+    font-weight: 600;
+    margin-bottom: 0.5rem;
+}
+.score-badge {
+    display: inline-block;
+    padding: 0.5rem 1rem;
+    border-radius: 99px;
+    font-weight: 700;
+    margin-top: 0.5rem;
+}
+.score-good {
+    background: rgba(16, 185, 129, 0.2);
+    color: #10b981;
+}
+.score-bad {
+    background: rgba(244, 63, 94, 0.2);
+    color: #f43f5e;
+}
+.loader {
+    width: 24px;
+    height: 24px;
+    border: 3px solid #FFF;
+    border-bottom-color: transparent;
+    border-radius: 50%;
+    display: inline-block;
+    box-sizing: border-box;
+    animation: rotation 1s linear infinite;
+    display: none;
+    vertical-align: middle;
+    margin-right: 10px;
+}
+.tabs {
+    display: flex;
+    gap: 1rem;
+    margin-bottom: 2rem;
+    justify-content: center;
+}
+.tab {
+    padding: 0.75rem 1.5rem;
+    background: rgba(255, 255, 255, 0.05);
+    border-radius: 12px;
+    cursor: pointer;
+    transition: all 0.3s ease;
+    border: 1px solid var(--glass-border);
+    color: var(--text-muted);
+}
+.tab.active {
+    background: var(--primary-color);
+    color: white;
+    border-color: var(--primary-color);
+}
+.view {
+    display: none;
+}
+.view.active {
+    display: block;
+}
+.dashboard-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 1.5rem;
+    margin-top: 1rem;
+}
+.stat-card {
+    background: rgba(255, 255, 255, 0.03);
+    padding: 1.5rem;
+    border-radius: 16px;
+    border: 1px solid var(--glass-border);
+    text-align: center;
+}
+.stat-value {
+    font-size: 1.5rem;
+    font-weight: 700;
+    margin-top: 0.5rem;
+    color: var(--primary-color);
+}
+.engine-list {
+    margin-top: 2rem;
+}
+.engine-item {
+    display: flex;
+    justify-content: space-between;
+    padding: 1rem;
+    border-bottom: 1px solid var(--glass-border);
+}
+.engine-item:last-child {
+    border-bottom: none;
+}
+@keyframes rotation {
+    0% {
+        transform: rotate(0deg);
+    }
+    100% {
+        transform: rotate(360deg);
+    }
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,196 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AI Chatbot Performance Analyzer</title>
+    <link rel="stylesheet" href="/static/css/style.css">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+</head>
+<body>
+    <div class="container">
+        <h1>Advanced Chatbot Performance</h1>
+        <p class="subtitle">Multi-context Evaluation with Attention LSTM</p>
+        <div id="status-bar"
+            style="margin-bottom: 1rem; text-align: center; font-size: 0.85rem; color: #fbbf24; background: rgba(251, 191, 36, 0.1); padding: 0.5rem; border-radius: 8px; border: 1px solid rgba(251, 191, 36, 0.2);">
+            ⚠️ Model Initialization in Progress... (Epoch 1/2)
+        </div>
+        <div class="tabs">
+            <div class="tab active" onclick="switchTab('analyzer')">Analyzer</div>
+            <div class="tab" onclick="switchTab('dashboard')">Analytics Dashboard</div>
+        </div>
+        <!-- Analyzer View -->
+        <div id="analyzer-view" class="view active">
+            <div class="form-group">
+                <label for="facts">Related Context / Facts</label>
+                <textarea id="facts" rows="3" placeholder="Paste the knowledge base or facts here..."></textarea>
+            </div>
+            <div class="form-group">
+                <label for="question">User Question</label>
+                <textarea id="question" rows="2" placeholder="Enter the user question..."></textarea>
+            </div>
+            <div class="form-group">
+                <label for="response">Chatbot Response</label>
+                <textarea id="response" rows="3" placeholder="Enter the chatbot response..."></textarea>
+            </div>
+            <button id="analyze-btn">
+                <span class="loader" id="loader"></span>
+                Perform Deep Analysis
+            </button>
+            <div id="result" class="result-container">
+                <div class="result-header">Expert Verdict:</div>
+                <div id="result-text"></div>
+                <div id="score-badge" class="score-badge"></div>
+                <div id="probability" style="margin-top: 1rem; font-size: 0.9rem; color: var(--text-muted);"></div>
+            </div>
+        </div>
+        <!-- Dashboard View -->
+        <div id="dashboard-view" class="view">
+            <h2 style="margin-bottom: 1rem;">Dataset Insights</h2>
+            <div class="dashboard-grid">
+                <div class="stat-card">
+                    <div>Total Queries</div>
+                    <div class="stat-value" id="total-queries">40,152</div>
+                </div>
+                <div class="stat-card">
+                    <div>Overall Accuracy</div>
+                    <div class="stat-value" id="overall-quality">31.4%</div>
+                </div>
+            </div>
+            <div class="engine-list">
+                <h3 style="margin-bottom: 0.5rem;">Engine Performance Breakdown</h3>
+                <div class="engine-item">
+                    <span>Openbook Performance</span>
+                    <span style="color: var(--success)">67.3% Top Responses</span>
+                </div>
+                <div class="engine-item">
+                    <span>Dialogflow Performance</span>
+                    <span style="color: #6366f1">24.2% Top Responses</span>
+                </div>
+                <div class="engine-item">
+                    <span>Watson Performance</span>
+                    <span style="color: var(--accent)">19.3% Top Responses</span>
+                </div>
+                <div class="engine-item">
+                    <span>Rasa Performance</span>
+                    <span style="color: var(--accent)">14.6% Top Responses</span>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        // Check model status on load and every 30 seconds
+        async function checkStatus() {
+            try {
+                const res = await fetch('/predict', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ ping: true })
+                });
+                if (res.ok) {
+                    const statusBar = document.getElementById('status-bar');
+                    statusBar.style.color = '#10b981';
+                    statusBar.style.background = 'rgba(16, 185, 129, 0.1)';
+                    statusBar.style.borderColor = 'rgba(16, 185, 129, 0.2)';
+                    statusBar.innerText = '✅ Advanced Intelligence Engine Active';
+                    setTimeout(() => statusBar.style.display = 'none', 5000);
+                }
+            } catch (e) { }
+        }
+        checkStatus();
+        const statusInterval = setInterval(() => {
+            const statusBar = document.getElementById('status-bar');
+            if (statusBar && statusBar.style.display !== 'none') {
+                checkStatus();
+            } else {
+                clearInterval(statusInterval);
+            }
+        }, 15000);
+        function switchTab(tab) {
+            document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+            document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
+            if (tab === 'analyzer') {
+                document.querySelector('.tab:nth-child(1)').classList.add('active');
+                document.getElementById('analyzer-view').classList.add('active');
+            } else {
+                document.querySelector('.tab:nth-child(2)').classList.add('active');
+                document.getElementById('dashboard-view').classList.add('active');
+            }
+        }
+        document.getElementById('analyze-btn').addEventListener('click', async () => {
+            const facts = document.getElementById('facts').value;
+            const question = document.getElementById('question').value;
+            const response = document.getElementById('response').value;
+            const loader = document.getElementById('loader');
+            const resultDiv = document.getElementById('result');
+            const resultText = document.getElementById('result-text');
+            const scoreBadge = document.getElementById('score-badge');
+            const probDiv = document.getElementById('probability');
+            if (!question || !response) {
+                alert('Please fill in the question and response.');
+                return;
+            }
+            loader.style.display = 'inline-block';
+            document.getElementById('analyze-btn').disabled = true;
+            resultDiv.style.display = 'none';
+            try {
+                const res = await fetch('/predict', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ facts, question, response })
+                });
+                const data = await res.json();
+                if (!res.ok) {
+                    if (res.status === 503) {
+                        alert(data.error);
+                    } else {
+                        alert('Analysis Error: ' + (data.error || 'Server error'));
+                    }
+                    return;
+                }
+                resultDiv.style.display = 'block';
+                if (data.is_best) {
+                    resultText.innerText = "Advanced analysis confirms this is a high-fidelity response.";
+                    scoreBadge.innerText = "OPTIMIZED";
+                    scoreBadge.className = "score-badge score-good";
+                } else {
+                    resultText.innerText = "Analysis suggests potential inaccuracies or linguistic flaws.";
+                    scoreBadge.innerText = "SUB-OPTIMAL";
+                    scoreBadge.className = "score-badge score-bad";
+                }
+                probDiv.innerText = `Attention Confidence: ${(data.probability * 100).toFixed(2)}%`;
+            } catch (err) {
+                alert('Analysis failed. Ensure server is running.');
+            } finally {
+                loader.style.display = 'none';
+                document.getElementById('analyze-btn').disabled = false;
+            }
+        });
+    </script>
+</body>
+</html>

tokenizer.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09022b465e7accaf02270c3afdf51c47e1ba38dba03cc84570c6bed8de942736
+size 131289

tokenizer_advanced.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76331bb23bfb4785ea911ae2c1243e3d97e41d79e00753b8e639b45d85bb0d92
+size 131611

train_model.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+import pickle
+import os
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate
+import tensorflow.keras.backend as K
+import pickle
+import os
+# Custom Attention Layer
+class Attention(Layer):
+    def __init__(self, **kwargs):
+        super(Attention, self).__init__(**kwargs)
+    def build(self, input_shape):
+        self.W = self.add_weight(name='attention_weight',
+                                 shape=(input_shape[-1], 1),
+                                 initializer='random_normal',
+                                 trainable=True)
+        self.b = self.add_weight(name='attention_bias',
+                                 shape=(input_shape[1], 1),
+                                 initializer='zeros',
+                                 trainable=True)
+        super(Attention, self).build(input_shape)
+    def call(self, x):
+        e = K.tanh(K.dot(x, self.W) + self.b)
+        a = K.softmax(e, axis=1)
+        output = x * a
+        return K.sum(output, axis=1)
+def train_advanced_model(file_path):
+    print("Loading data for advanced model...")
+    df = pd.read_csv(file_path)
+    # Fill missing facts
+    df['related_facts'] = df['related_facts'].fillna("No context provided.")
+    # Advanced Preprocessing: Combine facts, question, and response
+    # Structure: [FACTS] facts [SEP] [QUERY] question [SEP] [RES] response
+    df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \
+                 " [QUERY] " + df['question'].astype(str) + \
+                 " [RES] " + df['engine_response'].astype(str)
+    y = df['best'].astype(int).values
+    X_text = df['text'].astype(str).str.lower().values
+    max_words = 15000
+    max_len = 300
+    tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
+    tokenizer.fit_on_texts(X_text)
+    X_seq = tokenizer.texts_to_sequences(X_text)
+    X_pad = pad_sequences(X_seq, maxlen=max_len)
+    X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y)
+    # Save tokenizer immediately so it's available as soon as model starts saving checkpoints
+    with open('tokenizer_advanced.pickle', 'wb') as handle:
+        pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
+    print("Tokenizer saved.")
+    # Advanced Arch: Bi-LSTM + Attention
+    inputs = Input(shape=(max_len,))
+    embed = Embedding(max_words, 128)(inputs)
+    drop1 = SpatialDropout1D(0.3)(embed)
+    lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1)
+    attn = Attention()(lstm)
+    dense1 = Dense(64, activation='relu')(attn)
+    drop2 = Dropout(0.4)(dense1)
+    outputs = Dense(1, activation='sigmoid')(drop2)
+    model = Model(inputs=inputs, outputs=outputs)
+    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+    print(model.summary())
+    # Training with Checkpointing
+    batch_size = 128
+    epochs = 2
+    class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])}
+    checkpoint = tf.keras.callbacks.ModelCheckpoint(
+        'chatbot_performance_advanced.h5',
+        monitor='val_accuracy',
+        save_best_only=True,
+        mode='max',
+        verbose=1
+    )
+    print("Training advanced model with Attention...")
+    model.fit(
+        X_train, y_train,
+        epochs=epochs,
+        batch_size=batch_size,
+        validation_split=0.1,
+        class_weight=class_weight,
+        callbacks=[checkpoint],
+        verbose=1
+    )
+    print("Training complete.")
+if __name__ == "__main__":
+    train_advanced_model('BP_MHS_V1.csv')