d-e-e-k-11 commited on
Commit
bde793d
·
verified ·
1 Parent(s): 715e451

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ BP_MHS_V1.csv filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ env/
7
+ venv/
8
+ .env
9
+ .venv
10
+ pip-log.txt
11
+ pip-delete-this-directory.txt
12
+ .vscode/
13
+ .idea/
14
+ *.log
BP_MHS_V1.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fdb898ed9e08da458034cb0056c9eda0eda044791f4368ed5c5f79a633ce84a
3
+ size 21751232
README.md CHANGED
@@ -1,3 +1,35 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Text-Based Chatbot Performance Analysis
2
+
3
+ This project implements an end-to-end performance analysis system for text-based chatbots using Machine Learning. It features a Bidirectional LSTM model with an Attention mechanism to evaluate chatbot responses based on provided context and facts.
4
+
5
+ ## Features
6
+ - **Exploratory Data Analysis**: Visual insights into engine distribution and performance correlations.
7
+ - **Advanced LSTM Model**: Uses Bidirectional LSTM and Attention layers for high-accuracy evaluation.
8
+ - **Context-Aware Prediction**: Evaluates responses not just on linguistics but also on factual consistency.
9
+ - **Modern Web Interface**: Glassmorphic UI with real-time performance analytics.
10
+ - **Flask Backend**: Robust API for model inference.
11
+
12
+ ## Project Structure
13
+ - `train_model.py`: Training pipeline for the advanced model.
14
+ - `app.py`: Flask server for real-time predictions.
15
+ - `explore_data.py`: EDA script for dataset visualization.
16
+ - `BP_MHS_V1.csv`: The core dataset.
17
+ - `templates/` & `static/`: Frontend assets.
18
+
19
+ ## How to Run
20
+ 1. Install dependencies:
21
+ ```bash
22
+ pip install -r requirements.txt
23
+ ```
24
+ 2. Train the model:
25
+ ```bash
26
+ python train_model.py
27
+ ```
28
+ 3. Start the application:
29
+ ```bash
30
+ python app.py
31
+ ```
32
+ 4. Access the UI at `http://127.0.0.1:5000`.
33
+
34
+ ## Model Insights
35
+ The system uses an Attention mechanism to focus on critical parts of the facts and responses, ensuring the expert verdict is both accurate and contextually relevant.
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ from flask_cors import CORS
3
+ import tensorflow as tf
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ import pickle
6
+ import numpy as np
7
+ import os
8
+
9
+ app = Flask(__name__)
10
+ CORS(app)
11
+
12
+ # Load model and tokenizer
13
+ model = None
14
+ tokenizer = None
15
+ MAX_LEN = 300
16
+
17
+ # Registry for custom layers
18
+ def load_resources():
19
+ global model, tokenizer
20
+ model_path = 'chatbot_performance_advanced.h5'
21
+ tokenizer_path = 'tokenizer_advanced.pickle'
22
+
23
+ # Custom Attention Layer registration for loading
24
+ class Attention(tf.keras.layers.Layer):
25
+ def __init__(self, **kwargs):
26
+ super(Attention, self).__init__(**kwargs)
27
+ def build(self, input_shape):
28
+ self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
29
+ self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), initializer='zeros', trainable=True)
30
+ super(Attention, self).build(input_shape)
31
+ def call(self, x):
32
+ e = tf.keras.backend.tanh(tf.keras.backend.dot(x, self.W) + self.b)
33
+ a = tf.keras.backend.softmax(e, axis=1)
34
+ output = x * a
35
+ return tf.keras.backend.sum(output, axis=1)
36
+
37
+ if os.path.exists(model_path) and os.path.exists(tokenizer_path):
38
+ model = tf.keras.models.load_model(model_path, custom_objects={'Attention': Attention})
39
+ with open(tokenizer_path, 'rb') as handle:
40
+ tokenizer = pickle.load(handle)
41
+ print("Advanced Model and Tokenizer loaded successfully.")
42
+ return True
43
+ return False
44
+
45
+ @app.route('/')
46
+ def index():
47
+ return render_template('index.html')
48
+
49
+ @app.route('/predict', methods=['POST'])
50
+ def predict():
51
+ global model, tokenizer
52
+ if model is None or tokenizer is None:
53
+ if not load_resources():
54
+ return jsonify({
55
+ 'error': 'Model is still training. Please wait a few minutes.',
56
+ 'status': 'training'
57
+ }), 503
58
+
59
+ data = request.json
60
+ if data.get('ping'):
61
+ return jsonify({'status': 'ready'})
62
+
63
+ facts = data.get('facts', 'No context provided.')
64
+
65
+ question = data.get('question', '')
66
+ response = data.get('response', '')
67
+
68
+ try:
69
+ # Preprocess text with facts context
70
+ text = f"[FACTS] {facts} [QUERY] {question} [RES] {response}".lower()
71
+ seq = tokenizer.texts_to_sequences([text])
72
+ pad = pad_sequences(seq, maxlen=MAX_LEN)
73
+
74
+ # Prediction
75
+ prediction = model.predict(pad)[0][0]
76
+ is_best = bool(prediction > 0.5)
77
+
78
+ return jsonify({
79
+ 'probability': float(prediction),
80
+ 'is_best': is_best
81
+ })
82
+ except Exception as e:
83
+ return jsonify({'error': str(e)}), 500
84
+
85
+
86
+
87
+ if __name__ == '__main__':
88
+ load_resources()
89
+ app.run(debug=True, port=5000)
best_distribution.png ADDED
chatbot_performance_advanced.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f766823513c756c3a9cec1327e3b3fffcd9058d12078c29e58dd4a91c20b00
3
+ size 24397072
chatbot_performance_lstm.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da77b22c6b79bbddb794324eaf616ce9e7ee7d8fb0be287909791cb8e76377a0
3
+ size 4220752
engine_distribution.png ADDED
explore_data.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ def explore_data(file_path):
7
+ df = pd.read_csv(file_path)
8
+ print("Dataset Shape:", df.shape)
9
+ print("\nColumns:", df.columns.tolist())
10
+
11
+ # Check for missing values
12
+ print("\nMissing Values:\n", df.isnull().sum())
13
+
14
+ # Engine distribution
15
+ plt.figure(figsize=(10, 6))
16
+ sns.countplot(x='engine', data=df)
17
+ plt.title('Distribution of Chatbot Engines')
18
+ plt.savefig('engine_distribution.png')
19
+
20
+ # Performance distribution (Best/Worst)
21
+ plt.figure(figsize=(10, 6))
22
+ df['best'].value_counts().plot(kind='bar')
23
+ plt.title('Distribution of "Best" Label')
24
+ plt.savefig('best_distribution.png')
25
+
26
+ # p1-p10 correlation
27
+ p_cols = [f'p{i}' for i in range(1, 11)]
28
+ plt.figure(figsize=(12, 10))
29
+ sns.heatmap(df[p_cols].astype(int).corr(), annot=True, cmap='coolwarm')
30
+ plt.title('Correlation between Evaluation Parameters (p1-p10)')
31
+ plt.savefig('p_correlation.png')
32
+
33
+ print("\nTarget Variable 'best' counts:")
34
+ print(df['best'].value_counts())
35
+
36
+ if __name__ == "__main__":
37
+ explore_data('BP_MHS_V1.csv')
p_correlation.png ADDED
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ tensorflow
4
+ scikit-learn
5
+ matplotlib
6
+ seaborn
7
+ flask
8
+ flask-cors
static/css/style.css ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary-color: #6366f1;
3
+ --primary-hover: #4f46e5;
4
+ --bg-dark: #0f172a;
5
+ --glass-bg: rgba(255, 255, 255, 0.05);
6
+ --glass-border: rgba(255, 255, 255, 0.1);
7
+ --text-main: #f8fafc;
8
+ --text-muted: #94a3b8;
9
+ --accent: #f43f5e;
10
+ --success: #10b981;
11
+ }
12
+
13
+ * {
14
+ margin: 0;
15
+ padding: 0;
16
+ box-sizing: border-box;
17
+ font-family: 'Inter', system-ui, -apple-system, sans-serif;
18
+ }
19
+
20
+ body {
21
+ background-color: var(--bg-dark);
22
+ color: var(--text-main);
23
+ min-height: 100vh;
24
+ display: flex;
25
+ justify-content: center;
26
+ align-items: center;
27
+ background-image: radial-gradient(circle at 50% 50%, #1e293b 0%, #0f172a 100%);
28
+ }
29
+
30
+ .container {
31
+ width: 90%;
32
+ max-width: 800px;
33
+ padding: 2rem;
34
+ background: var(--glass-bg);
35
+ backdrop-filter: blur(12px);
36
+ border: 1px solid var(--glass-border);
37
+ border-radius: 24px;
38
+ box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
39
+ animation: fadeIn 0.8s ease-out;
40
+ }
41
+
42
+ @keyframes fadeIn {
43
+ from {
44
+ opacity: 0;
45
+ transform: translateY(20px);
46
+ }
47
+
48
+ to {
49
+ opacity: 1;
50
+ transform: translateY(0);
51
+ }
52
+ }
53
+
54
+ h1 {
55
+ font-size: 2.5rem;
56
+ margin-bottom: 0.5rem;
57
+ background: linear-gradient(to right, #818cf8, #f472b6);
58
+ -webkit-background-clip: text;
59
+ -webkit-text-fill-color: transparent;
60
+ text-align: center;
61
+ }
62
+
63
+ p.subtitle {
64
+ color: var(--text-muted);
65
+ text-align: center;
66
+ margin-bottom: 2.5rem;
67
+ }
68
+
69
+ .form-group {
70
+ margin-bottom: 1.5rem;
71
+ }
72
+
73
+ label {
74
+ display: block;
75
+ margin-bottom: 0.5rem;
76
+ font-weight: 500;
77
+ color: var(--text-main);
78
+ }
79
+
80
+ textarea,
81
+ input {
82
+ width: 100%;
83
+ padding: 1rem;
84
+ background: rgba(0, 0, 0, 0.2);
85
+ border: 1px solid var(--glass-border);
86
+ border-radius: 12px;
87
+ color: white;
88
+ font-size: 1rem;
89
+ transition: all 0.3s ease;
90
+ }
91
+
92
+ textarea:focus,
93
+ input:focus {
94
+ outline: none;
95
+ border-color: var(--primary-color);
96
+ box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2);
97
+ }
98
+
99
+ button {
100
+ width: 100%;
101
+ padding: 1rem;
102
+ background: linear-gradient(135deg, var(--primary-color), var(--primary-hover));
103
+ border: none;
104
+ border-radius: 12px;
105
+ color: white;
106
+ font-weight: 600;
107
+ font-size: 1.1rem;
108
+ cursor: pointer;
109
+ transition: transform 0.2s, box-shadow 0.2s;
110
+ margin-top: 1rem;
111
+ }
112
+
113
+ button:hover {
114
+ transform: translateY(-2px);
115
+ box-shadow: 0 4px 15px rgba(99, 102, 241, 0.4);
116
+ }
117
+
118
+ button:active {
119
+ transform: translateY(0);
120
+ }
121
+
122
+ .result-container {
123
+ margin-top: 2rem;
124
+ padding: 1.5rem;
125
+ border-radius: 16px;
126
+ background: rgba(255, 255, 255, 0.03);
127
+ border: 1px dashed var(--glass-border);
128
+ display: none;
129
+ }
130
+
131
+ .result-header {
132
+ font-weight: 600;
133
+ margin-bottom: 0.5rem;
134
+ }
135
+
136
+ .score-badge {
137
+ display: inline-block;
138
+ padding: 0.5rem 1rem;
139
+ border-radius: 99px;
140
+ font-weight: 700;
141
+ margin-top: 0.5rem;
142
+ }
143
+
144
+ .score-good {
145
+ background: rgba(16, 185, 129, 0.2);
146
+ color: #10b981;
147
+ }
148
+
149
+ .score-bad {
150
+ background: rgba(244, 63, 94, 0.2);
151
+ color: #f43f5e;
152
+ }
153
+
154
+ .loader {
155
+ width: 24px;
156
+ height: 24px;
157
+ border: 3px solid #FFF;
158
+ border-bottom-color: transparent;
159
+ border-radius: 50%;
160
+ display: inline-block;
161
+ box-sizing: border-box;
162
+ animation: rotation 1s linear infinite;
163
+ display: none;
164
+ vertical-align: middle;
165
+ margin-right: 10px;
166
+ }
167
+
168
+ .tabs {
169
+ display: flex;
170
+ gap: 1rem;
171
+ margin-bottom: 2rem;
172
+ justify-content: center;
173
+ }
174
+
175
+ .tab {
176
+ padding: 0.75rem 1.5rem;
177
+ background: rgba(255, 255, 255, 0.05);
178
+ border-radius: 12px;
179
+ cursor: pointer;
180
+ transition: all 0.3s ease;
181
+ border: 1px solid var(--glass-border);
182
+ color: var(--text-muted);
183
+ }
184
+
185
+ .tab.active {
186
+ background: var(--primary-color);
187
+ color: white;
188
+ border-color: var(--primary-color);
189
+ }
190
+
191
+ .view {
192
+ display: none;
193
+ }
194
+
195
+ .view.active {
196
+ display: block;
197
+ }
198
+
199
+ .dashboard-grid {
200
+ display: grid;
201
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
202
+ gap: 1.5rem;
203
+ margin-top: 1rem;
204
+ }
205
+
206
+ .stat-card {
207
+ background: rgba(255, 255, 255, 0.03);
208
+ padding: 1.5rem;
209
+ border-radius: 16px;
210
+ border: 1px solid var(--glass-border);
211
+ text-align: center;
212
+ }
213
+
214
+ .stat-value {
215
+ font-size: 1.5rem;
216
+ font-weight: 700;
217
+ margin-top: 0.5rem;
218
+ color: var(--primary-color);
219
+ }
220
+
221
+ .engine-list {
222
+ margin-top: 2rem;
223
+ }
224
+
225
+ .engine-item {
226
+ display: flex;
227
+ justify-content: space-between;
228
+ padding: 1rem;
229
+ border-bottom: 1px solid var(--glass-border);
230
+ }
231
+
232
+ .engine-item:last-child {
233
+ border-bottom: none;
234
+ }
235
+
236
+ @keyframes rotation {
237
+ 0% {
238
+ transform: rotate(0deg);
239
+ }
240
+
241
+ 100% {
242
+ transform: rotate(360deg);
243
+ }
244
+ }
templates/index.html ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>AI Chatbot Performance Analyzer</title>
8
+ <link rel="stylesheet" href="/static/css/style.css">
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
10
+ </head>
11
+
12
+ <body>
13
+ <div class="container">
14
+ <h1>Advanced Chatbot Performance</h1>
15
+ <p class="subtitle">Multi-context Evaluation with Attention LSTM</p>
16
+
17
+ <div id="status-bar"
18
+ style="margin-bottom: 1rem; text-align: center; font-size: 0.85rem; color: #fbbf24; background: rgba(251, 191, 36, 0.1); padding: 0.5rem; border-radius: 8px; border: 1px solid rgba(251, 191, 36, 0.2);">
19
+ ⚠️ Model Initialization in Progress... (Epoch 1/2)
20
+ </div>
21
+
22
+ <div class="tabs">
23
+ <div class="tab active" onclick="switchTab('analyzer')">Analyzer</div>
24
+ <div class="tab" onclick="switchTab('dashboard')">Analytics Dashboard</div>
25
+ </div>
26
+
27
+ <!-- Analyzer View -->
28
+ <div id="analyzer-view" class="view active">
29
+ <div class="form-group">
30
+ <label for="facts">Related Context / Facts</label>
31
+ <textarea id="facts" rows="3" placeholder="Paste the knowledge base or facts here..."></textarea>
32
+ </div>
33
+
34
+ <div class="form-group">
35
+ <label for="question">User Question</label>
36
+ <textarea id="question" rows="2" placeholder="Enter the user question..."></textarea>
37
+ </div>
38
+
39
+ <div class="form-group">
40
+ <label for="response">Chatbot Response</label>
41
+ <textarea id="response" rows="3" placeholder="Enter the chatbot response..."></textarea>
42
+ </div>
43
+
44
+ <button id="analyze-btn">
45
+ <span class="loader" id="loader"></span>
46
+ Perform Deep Analysis
47
+ </button>
48
+
49
+ <div id="result" class="result-container">
50
+ <div class="result-header">Expert Verdict:</div>
51
+ <div id="result-text"></div>
52
+ <div id="score-badge" class="score-badge"></div>
53
+ <div id="probability" style="margin-top: 1rem; font-size: 0.9rem; color: var(--text-muted);"></div>
54
+ </div>
55
+ </div>
56
+
57
+ <!-- Dashboard View -->
58
+ <div id="dashboard-view" class="view">
59
+ <h2 style="margin-bottom: 1rem;">Dataset Insights</h2>
60
+ <div class="dashboard-grid">
61
+ <div class="stat-card">
62
+ <div>Total Queries</div>
63
+ <div class="stat-value" id="total-queries">40,152</div>
64
+ </div>
65
+ <div class="stat-card">
66
+ <div>Overall Accuracy</div>
67
+ <div class="stat-value" id="overall-quality">31.4%</div>
68
+ </div>
69
+ </div>
70
+
71
+ <div class="engine-list">
72
+ <h3 style="margin-bottom: 0.5rem;">Engine Performance Breakdown</h3>
73
+ <div class="engine-item">
74
+ <span>Openbook Performance</span>
75
+ <span style="color: var(--success)">67.3% Top Responses</span>
76
+ </div>
77
+ <div class="engine-item">
78
+ <span>Dialogflow Performance</span>
79
+ <span style="color: #6366f1">24.2% Top Responses</span>
80
+ </div>
81
+ <div class="engine-item">
82
+ <span>Watson Performance</span>
83
+ <span style="color: var(--accent)">19.3% Top Responses</span>
84
+ </div>
85
+ <div class="engine-item">
86
+ <span>Rasa Performance</span>
87
+ <span style="color: var(--accent)">14.6% Top Responses</span>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+
93
+ <script>
94
+ // Check model status on load and every 30 seconds
95
+ async function checkStatus() {
96
+ try {
97
+ const res = await fetch('/predict', {
98
+ method: 'POST',
99
+ headers: { 'Content-Type': 'application/json' },
100
+ body: JSON.stringify({ ping: true })
101
+ });
102
+ if (res.ok) {
103
+ const statusBar = document.getElementById('status-bar');
104
+ statusBar.style.color = '#10b981';
105
+ statusBar.style.background = 'rgba(16, 185, 129, 0.1)';
106
+ statusBar.style.borderColor = 'rgba(16, 185, 129, 0.2)';
107
+ statusBar.innerText = '✅ Advanced Intelligence Engine Active';
108
+ setTimeout(() => statusBar.style.display = 'none', 5000);
109
+ }
110
+ } catch (e) { }
111
+ }
112
+ checkStatus();
113
+ const statusInterval = setInterval(() => {
114
+ const statusBar = document.getElementById('status-bar');
115
+ if (statusBar && statusBar.style.display !== 'none') {
116
+ checkStatus();
117
+ } else {
118
+ clearInterval(statusInterval);
119
+ }
120
+ }, 15000);
121
+
122
+ function switchTab(tab) {
123
+
124
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
125
+ document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
126
+
127
+ if (tab === 'analyzer') {
128
+ document.querySelector('.tab:nth-child(1)').classList.add('active');
129
+ document.getElementById('analyzer-view').classList.add('active');
130
+ } else {
131
+ document.querySelector('.tab:nth-child(2)').classList.add('active');
132
+ document.getElementById('dashboard-view').classList.add('active');
133
+ }
134
+ }
135
+
136
+ document.getElementById('analyze-btn').addEventListener('click', async () => {
137
+ const facts = document.getElementById('facts').value;
138
+ const question = document.getElementById('question').value;
139
+ const response = document.getElementById('response').value;
140
+ const loader = document.getElementById('loader');
141
+ const resultDiv = document.getElementById('result');
142
+ const resultText = document.getElementById('result-text');
143
+ const scoreBadge = document.getElementById('score-badge');
144
+ const probDiv = document.getElementById('probability');
145
+
146
+ if (!question || !response) {
147
+ alert('Please fill in the question and response.');
148
+ return;
149
+ }
150
+
151
+ loader.style.display = 'inline-block';
152
+ document.getElementById('analyze-btn').disabled = true;
153
+ resultDiv.style.display = 'none';
154
+
155
+ try {
156
+ const res = await fetch('/predict', {
157
+ method: 'POST',
158
+ headers: { 'Content-Type': 'application/json' },
159
+ body: JSON.stringify({ facts, question, response })
160
+ });
161
+
162
+ const data = await res.json();
163
+
164
+ if (!res.ok) {
165
+ if (res.status === 503) {
166
+ alert(data.error);
167
+ } else {
168
+ alert('Analysis Error: ' + (data.error || 'Server error'));
169
+ }
170
+ return;
171
+ }
172
+
173
+ resultDiv.style.display = 'block';
174
+ if (data.is_best) {
175
+ resultText.innerText = "Advanced analysis confirms this is a high-fidelity response.";
176
+ scoreBadge.innerText = "OPTIMIZED";
177
+ scoreBadge.className = "score-badge score-good";
178
+ } else {
179
+ resultText.innerText = "Analysis suggests potential inaccuracies or linguistic flaws.";
180
+ scoreBadge.innerText = "SUB-OPTIMAL";
181
+ scoreBadge.className = "score-badge score-bad";
182
+ }
183
+
184
+ probDiv.innerText = `Attention Confidence: ${(data.probability * 100).toFixed(2)}%`;
185
+
186
+ } catch (err) {
187
+ alert('Analysis failed. Ensure server is running.');
188
+ } finally {
189
+ loader.style.display = 'none';
190
+ document.getElementById('analyze-btn').disabled = false;
191
+ }
192
+ });
193
+ </script>
194
+ </body>
195
+
196
+ </html>
tokenizer.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09022b465e7accaf02270c3afdf51c47e1ba38dba03cc84570c6bed8de942736
3
+ size 131289
tokenizer_advanced.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76331bb23bfb4785ea911ae2c1243e3d97e41d79e00753b8e639b45d85bb0d92
3
+ size 131611
train_model.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tensorflow.keras.preprocessing.text import Tokenizer
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.metrics import classification_report, confusion_matrix
10
+ import pickle
11
+ import os
12
+
13
+ import pandas as pd
14
+ import numpy as np
15
+ import tensorflow as tf
16
+ from tensorflow.keras.preprocessing.text import Tokenizer
17
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
18
+ from tensorflow.keras.models import Model
19
+ from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, SpatialDropout1D, Bidirectional, Dropout, Layer, Concatenate
20
+ import tensorflow.keras.backend as K
21
+ import pickle
22
+ import os
23
+
24
+ # Custom Attention Layer
25
+ class Attention(Layer):
26
+ def __init__(self, **kwargs):
27
+ super(Attention, self).__init__(**kwargs)
28
+
29
+ def build(self, input_shape):
30
+ self.W = self.add_weight(name='attention_weight',
31
+ shape=(input_shape[-1], 1),
32
+ initializer='random_normal',
33
+ trainable=True)
34
+ self.b = self.add_weight(name='attention_bias',
35
+ shape=(input_shape[1], 1),
36
+ initializer='zeros',
37
+ trainable=True)
38
+ super(Attention, self).build(input_shape)
39
+
40
+ def call(self, x):
41
+ e = K.tanh(K.dot(x, self.W) + self.b)
42
+ a = K.softmax(e, axis=1)
43
+ output = x * a
44
+ return K.sum(output, axis=1)
45
+
46
+ def train_advanced_model(file_path):
47
+ print("Loading data for advanced model...")
48
+ df = pd.read_csv(file_path)
49
+
50
+ # Fill missing facts
51
+ df['related_facts'] = df['related_facts'].fillna("No context provided.")
52
+
53
+ # Advanced Preprocessing: Combine facts, question, and response
54
+ # Structure: [FACTS] facts [SEP] [QUERY] question [SEP] [RES] response
55
+ df['text'] = "[FACTS] " + df['related_facts'].astype(str) + \
56
+ " [QUERY] " + df['question'].astype(str) + \
57
+ " [RES] " + df['engine_response'].astype(str)
58
+
59
+ y = df['best'].astype(int).values
60
+ X_text = df['text'].astype(str).str.lower().values
61
+
62
+ max_words = 15000
63
+ max_len = 300
64
+
65
+ tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
66
+ tokenizer.fit_on_texts(X_text)
67
+ X_seq = tokenizer.texts_to_sequences(X_text)
68
+ X_pad = pad_sequences(X_seq, maxlen=max_len)
69
+
70
+ X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.15, random_state=42, stratify=y)
71
+
72
+ # Save tokenizer immediately so it's available as soon as model starts saving checkpoints
73
+ with open('tokenizer_advanced.pickle', 'wb') as handle:
74
+ pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
75
+ print("Tokenizer saved.")
76
+
77
+ # Advanced Arch: Bi-LSTM + Attention
78
+ inputs = Input(shape=(max_len,))
79
+ embed = Embedding(max_words, 128)(inputs)
80
+ drop1 = SpatialDropout1D(0.3)(embed)
81
+ lstm = Bidirectional(LSTM(64, return_sequences=True))(drop1)
82
+ attn = Attention()(lstm)
83
+ dense1 = Dense(64, activation='relu')(attn)
84
+ drop2 = Dropout(0.4)(dense1)
85
+ outputs = Dense(1, activation='sigmoid')(drop2)
86
+
87
+ model = Model(inputs=inputs, outputs=outputs)
88
+ model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
89
+ print(model.summary())
90
+
91
+ # Training with Checkpointing
92
+ batch_size = 128
93
+ epochs = 2
94
+ class_weight = {0: 1.0, 1: len(y[y==0]) / len(y[y==1])}
95
+
96
+ checkpoint = tf.keras.callbacks.ModelCheckpoint(
97
+ 'chatbot_performance_advanced.h5',
98
+ monitor='val_accuracy',
99
+ save_best_only=True,
100
+ mode='max',
101
+ verbose=1
102
+ )
103
+
104
+ print("Training advanced model with Attention...")
105
+ model.fit(
106
+ X_train, y_train,
107
+ epochs=epochs,
108
+ batch_size=batch_size,
109
+ validation_split=0.1,
110
+ class_weight=class_weight,
111
+ callbacks=[checkpoint],
112
+ verbose=1
113
+ )
114
+
115
+ print("Training complete.")
116
+
117
+
118
+ if __name__ == "__main__":
119
+ train_advanced_model('BP_MHS_V1.csv')