Files changed (2) hide show
  1. app.py +239 -193
  2. auto_retrain.py +89 -0
app.py CHANGED
@@ -1,193 +1,239 @@
1
- from flask import Flask, request, jsonify
2
- import numpy as np
3
- import os
4
- import pickle
5
-
6
- # πŸ›‘ NO HEAVY IMPORTS HERE (Lazy Loading to prevent 500 Errors)
7
-
8
- app = Flask(__name__)
9
-
10
- # Global Cache
11
- model_cache = {
12
- "lucid": None,
13
- "mouse": None,
14
- "fusion": None,
15
- "loaded": False,
16
- "error": None,
17
- "logs": []
18
- }
19
-
20
- def load_heavy_brains():
21
- """Loads libraries and models safely."""
22
- # 1. If already loaded, return the logs from the first time
23
- if model_cache["loaded"]:
24
- return model_cache["logs"]
25
-
26
- log = []
27
- try:
28
- log.append("⏳ Importing TensorFlow...")
29
- import tensorflow as tf
30
- log.append("βœ… TensorFlow Imported")
31
-
32
- log.append("⏳ Importing XGBoost...")
33
- import xgboost as xgb
34
- log.append("βœ… XGBoost Imported")
35
-
36
- # Define Architecture
37
- Sequential = tf.keras.models.Sequential
38
- Input = tf.keras.layers.Input
39
- LSTM = tf.keras.layers.LSTM
40
- Dense = tf.keras.layers.Dense
41
- Dropout = tf.keras.layers.Dropout
42
- BatchNormalization = tf.keras.layers.BatchNormalization
43
- LeakyReLU = tf.keras.layers.LeakyReLU
44
-
45
- # Load LUCID
46
- if os.path.exists("lucid_cnn.h5"):
47
- model_cache["lucid"] = tf.keras.models.load_model("lucid_cnn.h5")
48
- log.append("βœ… LUCID Model Loaded")
49
- else:
50
- log.append("⚠️ lucid_cnn.h5 missing")
51
-
52
- # Load MOUSE
53
- if os.path.exists("delbot_rnn.h5"):
54
- mouse_model = Sequential([
55
- Input(shape=(None, 10)),
56
- LSTM(128, return_sequences=True, recurrent_dropout=0.0),
57
- BatchNormalization(),
58
- LeakyReLU(alpha=0.1),
59
- Dropout(0.3),
60
- LSTM(64, return_sequences=False, recurrent_dropout=0.0),
61
- LeakyReLU(alpha=0.1),
62
- Dropout(0.1),
63
- Dense(2, activation='softmax')
64
- ])
65
- mouse_model.load_weights("delbot_rnn.h5")
66
- model_cache["mouse"] = mouse_model
67
- log.append("βœ… Mouse Model Loaded")
68
- else:
69
- log.append("⚠️ delbot_rnn.h5 missing")
70
-
71
- # Load FUSION
72
- if os.path.exists("fusion_xgboost.pkl"):
73
- with open("fusion_xgboost.pkl", "rb") as f:
74
- model_cache["fusion"] = pickle.load(f)
75
- log.append("βœ… Fusion Model Loaded")
76
- else:
77
- log.append("⚠️ fusion_xgboost.pkl missing")
78
-
79
- model_cache["loaded"] = True
80
- model_cache["logs"] = log
81
- return log
82
-
83
- except Exception as e:
84
- err_msg = f"❌ CRITICAL LOAD ERROR: {str(e)}"
85
- print(err_msg)
86
- model_cache["error"] = err_msg
87
- return log + [err_msg]
88
-
89
- # --- HELPER: DATA TRANSLATOR ---
90
- def process_mouse_data(trace):
91
- try:
92
- import numpy as np
93
- MAX_STEPS = 60
94
- if not trace or len(trace) < 2: return None
95
-
96
- vectors = []
97
- for i in range(1, len(trace)):
98
- dt = (trace[i]['t'] - trace[i-1]['t']) or 1
99
- dx = trace[i]['x'] - trace[i-1]['x']
100
- dy = trace[i]['y'] - trace[i-1]['y']
101
- angle = np.arctan2(dy, dx)
102
- vectors.append([dx, dy, dt, dx/dt, dy/dt, angle, 0.0, 0.0, 0.0, 0.0])
103
-
104
- data = np.array(vectors)
105
- if len(data) > MAX_STEPS: data = data[:MAX_STEPS]
106
- elif len(data) < MAX_STEPS:
107
- data = np.vstack([data, np.zeros((MAX_STEPS - len(data), 10))])
108
- return np.expand_dims(data, axis=0)
109
- except: return None
110
-
111
- @app.route('/')
112
- def home():
113
- return "<h3>Bot Detection Server (JSON Fix Applied)</h3>Status: 🟒 Running"
114
-
115
- @app.route('/detect', methods=['POST'])
116
- def detect():
117
- # 1. Load Brains
118
- load_logs = load_heavy_brains()
119
- if load_logs is None: load_logs = [] # Safety fix for NoneType error
120
-
121
- if model_cache["error"]:
122
- return jsonify({
123
- "success": False,
124
- "error": "Model Load Failed",
125
- "details": model_cache["error"]
126
- })
127
-
128
- # 2. Predict
129
- try:
130
- data = request.json or {}
131
- botd = float(data.get('botd_score', 0.0))
132
- mouse_trace = data.get('mouse_trace', [])
133
- ts = data.get('request_timestamps', [])
134
-
135
- mouse_score = 0.5
136
- net_score = 0.0
137
-
138
- # Mouse
139
- if model_cache["mouse"]:
140
- inp = process_mouse_data(mouse_trace)
141
- if inp is not None:
142
- # FLOAT() CASTING HERE IS CRITICAL
143
- raw_score = model_cache["mouse"].predict(inp, verbose=0)[0][1]
144
- mouse_score = float(raw_score)
145
-
146
- # Net
147
- if model_cache["lucid"] and len(ts) > 2:
148
- import numpy as np
149
- iat = np.diff(sorted(ts))[:10] / 1000.0
150
- mat = np.zeros((1, 10, 11, 1))
151
- l = min(len(iat), 10)
152
- mat[0, :l, 0, 0] = iat[:l]
153
- # FLOAT() CASTING HERE
154
- raw_net = model_cache["lucid"].predict(mat, verbose=0)[0][0]
155
- net_score = float(raw_net)
156
-
157
- # Fusion
158
- final_prob = max(botd, mouse_score)
159
- if model_cache["fusion"]:
160
- # FLOAT() CASTING HERE
161
- raw_fusion = model_cache["fusion"].predict_proba([[botd, mouse_score, net_score]])[0][1]
162
- final_prob = float(raw_fusion)
163
-
164
- # Logic
165
- pct = float(final_prob * 100) # Force Python Float
166
-
167
- if pct > 85: decision, action, is_bot = "BOT", "BLOCK", True
168
- elif pct > 50: decision, action, is_bot = "SUSPICIOUS", "CAPTCHA", True
169
- else: decision, action, is_bot = "HUMAN", "ALLOW", False
170
-
171
- return jsonify({
172
- "success": True,
173
- "is_bot": is_bot,
174
- "action": action,
175
- "decision": decision,
176
- "confidence": round(pct, 2),
177
- "forensics": {
178
- "botd": round(botd, 2),
179
- "mouse": round(mouse_score, 2),
180
- "net": round(net_score, 2)
181
- },
182
- "internal_logs": load_logs
183
- })
184
-
185
- except Exception as e:
186
- return jsonify({
187
- "success": False,
188
- "error": f"Prediction Error: {str(e)}",
189
- "internal_logs": load_logs
190
- })
191
-
192
- if __name__ == '__main__':
193
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ import numpy as np
3
+ import os
4
+ import pickle
5
+ import uuid
6
+ import json
7
+ from datetime import datetime
8
+
9
+ # πŸ›‘ NO HEAVY IMPORTS HERE (Lazy Loading to prevent 500 Errors)
10
+
11
+ app = Flask(__name__)
12
+
13
+ # Global Cache
14
+ model_cache = {
15
+ "lucid": None,
16
+ "mouse": None,
17
+ "fusion": None,
18
+ "loaded": False,
19
+ "error": None,
20
+ "logs": []
21
+ }
22
+
23
+ # ------------------ LOGGING HELPERS ------------------
24
+
25
+ def log_prediction(req_id, payload, output):
26
+ record = {
27
+ "request_id": req_id,
28
+ "time": datetime.utcnow().isoformat(),
29
+ "input": payload,
30
+ "output": output
31
+ }
32
+ with open("predictions.log", "a") as f:
33
+ f.write(json.dumps(record) + "\n")
34
+
35
+ def log_feedback(feedback):
36
+ feedback["time"] = datetime.utcnow().isoformat()
37
+ with open("feedback.log", "a") as f:
38
+ f.write(json.dumps(feedback) + "\n")
39
+
40
+ # ------------------ MODEL LOADING ------------------
41
+
42
+ def load_heavy_brains():
43
+ if model_cache["loaded"]:
44
+ return []
45
+
46
+ log = []
47
+ try:
48
+ log.append("⏳ Importing TensorFlow...")
49
+ import tensorflow as tf
50
+ log.append("βœ… TensorFlow Imported")
51
+
52
+ log.append("⏳ Importing XGBoost...")
53
+ import xgboost as xgb
54
+ log.append("βœ… XGBoost Imported")
55
+
56
+ Sequential = tf.keras.models.Sequential
57
+ Input = tf.keras.layers.Input
58
+ LSTM = tf.keras.layers.LSTM
59
+ Dense = tf.keras.layers.Dense
60
+ Dropout = tf.keras.layers.Dropout
61
+ BatchNormalization = tf.keras.layers.BatchNormalization
62
+ LeakyReLU = tf.keras.layers.LeakyReLU
63
+
64
+ if os.path.exists("lucid_cnn.h5"):
65
+ model_cache["lucid"] = tf.keras.models.load_model("lucid_cnn.h5")
66
+ log.append("βœ… LUCID Model Loaded")
67
+ else:
68
+ log.append("⚠️ lucid_cnn.h5 missing")
69
+
70
+ if os.path.exists("delbot_rnn.h5"):
71
+ mouse_model = Sequential([
72
+ Input(shape=(None, 10)),
73
+ LSTM(128, return_sequences=True),
74
+ BatchNormalization(),
75
+ LeakyReLU(alpha=0.1),
76
+ Dropout(0.3),
77
+ LSTM(64),
78
+ LeakyReLU(alpha=0.1),
79
+ Dropout(0.1),
80
+ Dense(2, activation='softmax')
81
+ ])
82
+ mouse_model.load_weights("delbot_rnn.h5")
83
+ model_cache["mouse"] = mouse_model
84
+ log.append("βœ… Mouse Model Loaded")
85
+ else:
86
+ log.append("⚠️ delbot_rnn.h5 missing")
87
+
88
+ if os.path.exists("fusion_xgboost.pkl"):
89
+ with open("fusion_xgboost.pkl", "rb") as f:
90
+ model_cache["fusion"] = pickle.load(f)
91
+ log.append("βœ… Fusion Model Loaded")
92
+ else:
93
+ log.append("⚠️ fusion_xgboost.pkl missing")
94
+
95
+ model_cache["loaded"] = True
96
+ model_cache["logs"] = log
97
+ return log
98
+
99
+ except Exception as e:
100
+ err = f"❌ CRITICAL LOAD ERROR: {str(e)}"
101
+ model_cache["error"] = err
102
+ return log + [err]
103
+
104
+ # ------------------ DATA PROCESSING ------------------
105
+
106
+ def process_mouse_data(trace):
107
+ try:
108
+ MAX_STEPS = 60
109
+ if not trace or len(trace) < 2:
110
+ return None
111
+
112
+ vectors = []
113
+ for i in range(1, len(trace)):
114
+ dt = (trace[i]['t'] - trace[i-1]['t']) or 1
115
+ dx = trace[i]['x'] - trace[i-1]['x']
116
+ dy = trace[i]['y'] - trace[i-1]['y']
117
+ angle = np.arctan2(dy, dx)
118
+ vectors.append([dx, dy, dt, dx/dt, dy/dt, angle, 0, 0, 0, 0])
119
+
120
+ data = np.array(vectors)
121
+ if len(data) > MAX_STEPS:
122
+ data = data[:MAX_STEPS]
123
+ else:
124
+ data = np.vstack([data, np.zeros((MAX_STEPS - len(data), 10))])
125
+
126
+ return np.expand_dims(data, axis=0)
127
+ except:
128
+ return None
129
+
130
+ # ------------------ ROUTES ------------------
131
+
132
+ @app.route("/")
133
+ def home():
134
+ return "<h3>Bot Detection Server</h3>Status: 🟒 Running"
135
+
136
+ @app.route("/detect", methods=["POST"])
137
+ def detect():
138
+ req_id = str(uuid.uuid4())
139
+ load_logs = load_heavy_brains()
140
+
141
+ if model_cache["error"]:
142
+ return jsonify({"success": False, "error": model_cache["error"]})
143
+
144
+ try:
145
+ data = request.json or {}
146
+ botd = float(data.get("botd_score", 0.0))
147
+ mouse_trace = data.get("mouse_trace", [])
148
+ ts = data.get("request_timestamps", [])
149
+
150
+ mouse_score = None
151
+ net_score = 0.0
152
+
153
+ if model_cache["mouse"]:
154
+ inp = process_mouse_data(mouse_trace)
155
+ if inp is not None:
156
+ mouse_score = float(
157
+ model_cache["mouse"].predict(inp, verbose=0)[0][1]
158
+ )
159
+
160
+ if model_cache["lucid"] and len(ts) > 2:
161
+ iat = np.diff(sorted(ts))[:10] / 1000.0
162
+ mat = np.zeros((1, 10, 11, 1))
163
+ mat[0, :len(iat), 0, 0] = iat
164
+ net_score = float(
165
+ model_cache["lucid"].predict(mat, verbose=0)[0][0]
166
+ )
167
+
168
+ features = [
169
+ botd,
170
+ mouse_score if mouse_score is not None else 0.5,
171
+ net_score
172
+ ]
173
+
174
+ final_prob = max(features)
175
+ if model_cache["fusion"]:
176
+ final_prob = float(
177
+ model_cache["fusion"].predict_proba([features])[0][1]
178
+ )
179
+
180
+ pct = float(np.clip(final_prob, 0.0, 1.0) * 100)
181
+
182
+ if pct > 85:
183
+ decision, action, is_bot = "BOT", "BLOCK", True
184
+ elif pct > 50:
185
+ decision, action, is_bot = "SUSPICIOUS", "CAPTCHA", True
186
+ else:
187
+ decision, action, is_bot = "HUMAN", "ALLOW", False
188
+
189
+ response = {
190
+ "success": True,
191
+ "request_id": req_id,
192
+ "is_bot": is_bot,
193
+ "action": action,
194
+ "decision": decision,
195
+ "confidence": round(pct, 2),
196
+ "forensics": {
197
+ "botd": round(botd, 2),
198
+ "mouse": round(mouse_score, 2) if mouse_score is not None else None,
199
+ "net": round(net_score, 2)
200
+ },
201
+ "signals": {
202
+ "mouse_available": mouse_score is not None,
203
+ "net_available": net_score > 0
204
+ },
205
+ "internal_logs": load_logs
206
+ }
207
+
208
+ log_prediction(req_id, data, response)
209
+ return jsonify(response)
210
+
211
+ except Exception as e:
212
+ return jsonify({"success": False, "error": str(e)})
213
+
214
+ @app.route("/feedback", methods=["POST"])
215
+ def feedback():
216
+ """
217
+ {
218
+ "request_id": "...",
219
+ "actual": "HUMAN" | "BOT",
220
+ "source": "captcha" | "admin" | "auto"
221
+ }
222
+ """
223
+ fb = request.json
224
+ log_feedback(fb)
225
+ return jsonify({"success": True})
226
+
227
+ # ------------------ ENTRY ------------------
228
+
229
+ if __name__ == "__main__":
230
+ app.run(host="0.0.0.0", port=7860)
231
+
232
+ import threading
233
+ from auto_retrain import retrain_loop
234
+
235
+ def start_auto_retrain():
236
+ t = threading.Thread(target=retrain_loop, daemon=True)
237
+ t.start()
238
+
239
+ start_auto_retrain()
auto_retrain.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import json
3
+ import os
4
+ import pickle
5
+ from datetime import datetime, timedelta
6
+ import numpy as np
7
+ from xgboost import XGBClassifier
8
+
9
+ # ---------------- CONFIG ----------------
10
+ CHECK_INTERVAL = 24*60*60 # check once per day
11
+ RETRAIN_DAYS = 30
12
+ MIN_SAMPLES = 100
13
+
14
+ PRED_FILE = "predictions.log"
15
+ FB_FILE = "feedback.log"
16
+ MODEL_OUT = "fusion_xgboost.pkl"
17
+ LAST_TRAIN_FILE = "last_train.txt"
18
+ # --------------------------------------
19
+
20
+ def load_json_lines(path):
21
+ if not os.path.exists(path):
22
+ return []
23
+ with open(path) as f:
24
+ return [json.loads(line) for line in f]
25
+
26
+ def should_retrain():
27
+ if not os.path.exists(LAST_TRAIN_FILE):
28
+ return True
29
+ last = open(LAST_TRAIN_FILE).read().strip()
30
+ last_date = datetime.fromisoformat(last)
31
+ return (datetime.utcnow() - last_date) >= timedelta(days=RETRAIN_DAYS)
32
+
33
+ def retrain():
34
+ print("πŸ” Auto-retrain: checking conditions...")
35
+
36
+ if not should_retrain():
37
+ print("⏳ Period not reached")
38
+ return
39
+
40
+ preds = load_json_lines(PRED_FILE)
41
+ fbs = load_json_lines(FB_FILE)
42
+ fb_map = {f["request_id"]: f for f in fbs}
43
+
44
+ X, y = [], []
45
+
46
+ for p in preds:
47
+ rid = p["request_id"]
48
+ if rid not in fb_map:
49
+ continue
50
+
51
+ f = p["output"]["forensics"]
52
+ X.append([
53
+ f["botd"],
54
+ f["mouse"] if f["mouse"] is not None else 0.5,
55
+ f["net"]
56
+ ])
57
+ y.append(1 if fb_map[rid]["actual"] == "BOT" else 0)
58
+
59
+ if len(X) < MIN_SAMPLES:
60
+ print(f"⚠️ Not enough feedback: {len(X)}/{MIN_SAMPLES}")
61
+ return
62
+
63
+ print(f"πŸš€ Retraining fusion model with {len(X)} samples")
64
+
65
+ model = XGBClassifier(
66
+ n_estimators=100,
67
+ max_depth=3,
68
+ learning_rate=0.1,
69
+ eval_metric="logloss",
70
+ use_label_encoder=False
71
+ )
72
+
73
+ model.fit(np.array(X), np.array(y))
74
+
75
+ with open(MODEL_OUT, "wb") as f:
76
+ pickle.dump(model, f)
77
+
78
+ with open(LAST_TRAIN_FILE, "w") as f:
79
+ f.write(datetime.utcnow().isoformat())
80
+
81
+ print("βœ… Auto-retraining completed")
82
+
83
+ def retrain_loop():
84
+ while True:
85
+ try:
86
+ retrain()
87
+ except Exception as e:
88
+ print("❌ Retrain error:", e)
89
+ time.sleep(CHECK_INTERVAL)