prashanth135 commited on
Commit
fa0f3f7
Β·
verified Β·
1 Parent(s): a53e056

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +470 -0
app.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, re, time, pickle, zipfile, shutil, urllib.request
2
+ from urllib.parse import urlparse
3
+ from datetime import datetime
4
+ from typing import Optional, List
5
+
6
+ import numpy as np
7
+ import Levenshtein
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+
12
+ from fastapi import FastAPI, HTTPException
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from pydantic import BaseModel
15
+ from transformers import (
16
+ BertTokenizer, BertForSequenceClassification,
17
+ RobertaTokenizer, RobertaForSequenceClassification
18
+ )
19
+
20
+ # ── Setup ──────────────────────────────────────────────
21
+ app = FastAPI(title="AdaptiveShield API", version="1.0.0")
22
+ app.add_middleware(CORSMiddleware, allow_origins=["*"],
23
+ allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
24
+
25
+ DEVICE = torch.device("cpu")
26
+ MAX_LEN = 128
27
+ MAX_URL_LEN = 200
28
+ NUM_FEATURES = 30
29
+
30
+ TOP_DOMAINS = ["google.com","youtube.com","facebook.com","amazon.com",
31
+ "wikipedia.org","twitter.com","instagram.com","linkedin.com",
32
+ "microsoft.com","apple.com","netflix.com","paypal.com",
33
+ "ebay.com","reddit.com","github.com","stackoverflow.com",
34
+ "dropbox.com","spotify.com","adobe.com","yahoo.com"]
35
+
36
+ SUSPICIOUS_TLDS = [".xyz",".tk",".ml",".ga",".cf",".pw",".top",
37
+ ".ru",".cn",".info",".biz",".click",".link"]
38
+
39
+ BRAND_KEYWORDS = ["paypal","amazon","google","microsoft","apple","facebook",
40
+ "netflix","bank","secure","login","verify","account",
41
+ "update","confirm","password","credit","debit","wallet"]
42
+
43
+ URL_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~:/?#[]@!$&()*+,;=%"
44
+ char_to_idx = {c: i+2 for i, c in enumerate(URL_CHARS)}
45
+ char_to_idx["<PAD>"] = 0
46
+ char_to_idx["<UNK>"] = 1
47
+ VOCAB_SIZE = len(char_to_idx)
48
+
49
+ feedback_store = []
50
+ scan_history = []
51
+
52
+ # ── CNN Model ──────────────────────────────────────────
53
+ class PhishingCNN(nn.Module):
54
+ def __init__(self, vocab_size=None, embed_dim=128, num_filters=128,
55
+ filter_sizes=[2,3,4,5], num_classes=2, dropout=0.5):
56
+ super().__init__()
57
+ vs = vocab_size or VOCAB_SIZE
58
+ self.embedding = nn.Embedding(vs, embed_dim, padding_idx=0)
59
+ self.convs = nn.ModuleList([
60
+ nn.Sequential(nn.Conv1d(embed_dim, num_filters, fs),
61
+ nn.BatchNorm1d(num_filters), nn.ReLU())
62
+ for fs in filter_sizes
63
+ ])
64
+ total = num_filters * len(filter_sizes)
65
+ self.classifier = nn.Sequential(
66
+ nn.Dropout(dropout), nn.Linear(total, 256), nn.ReLU(),
67
+ nn.BatchNorm1d(256), nn.Dropout(dropout*0.6), nn.Linear(256, num_classes)
68
+ )
69
+ def forward(self, x):
70
+ emb = self.embedding(x).permute(0, 2, 1)
71
+ pooled = [F.max_pool1d(c(emb), c(emb).size(2)).squeeze(2) for c in self.convs]
72
+ return self.classifier(torch.cat(pooled, dim=1))
73
+
74
+ # ── GNN Model ─────────────────────────────────────────
75
+ GNN_AVAILABLE = False
76
+ try:
77
+ from torch_geometric.nn import SAGEConv, BatchNorm as GNNBatchNorm
78
+ class PhishingGNN(nn.Module):
79
+ def __init__(self, num_features, hidden_dim, num_classes, dropout=0.3):
80
+ super().__init__()
81
+ self.conv1 = SAGEConv(num_features, hidden_dim)
82
+ self.conv2 = SAGEConv(hidden_dim, hidden_dim*2)
83
+ self.conv3 = SAGEConv(hidden_dim*2, hidden_dim)
84
+ self.bn1 = GNNBatchNorm(hidden_dim)
85
+ self.bn2 = GNNBatchNorm(hidden_dim*2)
86
+ self.bn3 = GNNBatchNorm(hidden_dim)
87
+ self.cls = nn.Sequential(
88
+ nn.Linear(hidden_dim, 64), nn.ReLU(),
89
+ nn.Dropout(dropout), nn.Linear(64, num_classes)
90
+ )
91
+ self.drop = dropout
92
+ def forward(self, x, ei):
93
+ x = F.dropout(F.relu(self.bn1(self.conv1(x,ei))), p=self.drop, training=self.training)
94
+ x = F.dropout(F.relu(self.bn2(self.conv2(x,ei))), p=self.drop, training=self.training)
95
+ x = F.dropout(F.relu(self.bn3(self.conv3(x,ei))), p=self.drop, training=self.training)
96
+ return self.cls(x)
97
+ GNN_AVAILABLE = True
98
+ except Exception as e:
99
+ print(f"GNN not available: {e}")
100
+
101
+ # ── Feature Functions ──────────────────────────────────
102
+ def compute_entropy(text):
103
+ if not text: return 0.0
104
+ freq = [text.count(c)/len(text) for c in set(text)]
105
+ return -sum(p*np.log2(p+1e-10) for p in freq)
106
+
107
+ def min_typo_distance(domain):
108
+ if not domain: return 10
109
+ clean = domain.replace("www.", "")
110
+ return min(Levenshtein.distance(clean, d) for d in TOP_DOMAINS)
111
+
112
+ def is_ip(domain):
113
+ return bool(re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", domain))
114
+
115
+ def count_encoded(url):
116
+ return len(re.findall(r"%[0-9a-fA-F]{2}", url))
117
+
118
+ def extract_domain_name(url):
119
+ try:
120
+ parsed = urlparse(url if url.startswith("http") else "http://"+url)
121
+ parts = parsed.netloc.split(".")
122
+ return ".".join(parts[-2:]) if len(parts) >= 2 else parsed.netloc
123
+ except:
124
+ return url
125
+
126
+ def extract_features(url):
127
+ url = str(url)
128
+ try:
129
+ parsed = urlparse(url if url.startswith("http") else "http://"+url)
130
+ domain, path, query = parsed.netloc, parsed.path, parsed.query
131
+ except:
132
+ domain, path, query = url, "", ""
133
+ td = min_typo_distance(domain)
134
+ return np.array([
135
+ len(url), len(domain), len(path), len(query),
136
+ url.count("."), url.count("-"), url.count("/"),
137
+ url.count("@"), url.count("?"), url.count("="),
138
+ url.count("%"), sum(c.isdigit() for c in url),
139
+ len(domain.split("."))-1 if domain else 0,
140
+ 1 if url.startswith("https") else 0,
141
+ 1 if is_ip(domain) else 0,
142
+ 1 if any(domain.endswith(t) for t in SUSPICIOUS_TLDS) else 0,
143
+ 1 if any(b in url.lower() for b in BRAND_KEYWORDS) else 0,
144
+ compute_entropy(url),
145
+ sum(c.isdigit() for c in url)/max(len(url), 1),
146
+ len([p for p in path.split("/") if p]),
147
+ 1 if td==1 else 0, 1 if td==2 else 0, td,
148
+ len(re.findall(r"[0-9]", domain)),
149
+ 1 if "xn--" in domain else 0,
150
+ url.count("_"), count_encoded(url),
151
+ 1 if re.search(r"\d{1,3}-\d{1,3}-\d{1,3}-\d{1,3}", domain) else 0,
152
+ len(domain.split(".")[-1]) if domain else 0,
153
+ sum(c.isupper() for c in url)/max(len(url), 1)
154
+ ], dtype=np.float32)
155
+
156
+ def get_risk_level(prob):
157
+ if prob >= 0.70: return "HIGH"
158
+ elif prob >= 0.40: return "MEDIUM"
159
+ return "LOW"
160
+
161
+ def analyze_extra(url):
162
+ domain = extract_domain_name(url)
163
+ td = min_typo_distance(domain)
164
+ dists = {d: Levenshtein.distance(domain.replace("www.",""), d) for d in TOP_DOMAINS}
165
+ closest = min(dists, key=dists.get)
166
+ return {
167
+ "typosquatting_detected" : td <= 2,
168
+ "typo_distance" : int(td),
169
+ "closest_legitimate" : closest,
170
+ "homograph_detected" : "xn--" in domain,
171
+ "ip_as_domain" : is_ip(domain),
172
+ "suspicious_tld" : any(domain.endswith(t) for t in SUSPICIOUS_TLDS),
173
+ "brand_impersonation" : any(b in url.lower() for b in BRAND_KEYWORDS),
174
+ "url_entropy" : round(compute_entropy(url), 4),
175
+ "uses_https" : url.startswith("https"),
176
+ "url_encoded_chars" : count_encoded(url),
177
+ "domain" : domain
178
+ }
179
+
180
+ # ── Model Setup ────────────────────────────────────────
181
+ models = {}
182
+
183
+ def download_from_drive(file_id, dest_path):
184
+ if os.path.exists(dest_path):
185
+ print(f"Already exists: {dest_path}")
186
+ return True
187
+ url = f"https://drive.google.com/uc?export=download&id={file_id}&confirm=t"
188
+ print(f"Downloading to {dest_path}...")
189
+ try:
190
+ urllib.request.urlretrieve(url, dest_path)
191
+ print(f"Downloaded: {dest_path}")
192
+ return True
193
+ except Exception as e:
194
+ print(f"Failed: {e}")
195
+ return False
196
+
197
+ def extract_transformer(zip_path, target_path):
198
+ if os.path.exists(f"{target_path}/config.json"):
199
+ print(f"Already extracted: {target_path}")
200
+ return
201
+ tmp = f"/tmp/ext_{os.path.basename(target_path)}"
202
+ with zipfile.ZipFile(zip_path, "r") as z:
203
+ z.extractall(tmp)
204
+ for root, dirs, files in os.walk(tmp):
205
+ if "config.json" in files and "model.safetensors" in files:
206
+ if os.path.exists(target_path):
207
+ shutil.rmtree(target_path)
208
+ shutil.copytree(root, target_path)
209
+ print(f"Extracted: {target_path}")
210
+ return
211
+
212
+ def extract_pt(zip_path, pt_path):
213
+ if os.path.exists(pt_path):
214
+ print(f"Already extracted: {pt_path}")
215
+ return
216
+ tmp = f"/tmp/ext_{os.path.basename(pt_path)}"
217
+ os.makedirs(tmp, exist_ok=True)
218
+ with zipfile.ZipFile(zip_path, "r") as z:
219
+ z.extractall(tmp)
220
+ pt_name = os.path.basename(pt_path)
221
+ for root, dirs, files in os.walk(tmp):
222
+ if pt_name in files:
223
+ shutil.copy(f"{root}/{pt_name}", pt_path)
224
+ print(f"Extracted: {pt_path}")
225
+ return
226
+
227
+ def setup_models():
228
+ os.makedirs("./models/bert", exist_ok=True)
229
+ os.makedirs("./models/roberta", exist_ok=True)
230
+
231
+ ids = {
232
+ "bert_model.zip" : os.getenv("BERT_FILE_ID", ""),
233
+ "roberta_model.zip" : os.getenv("ROBERTA_FILE_ID", ""),
234
+ "cnn_model.zip" : os.getenv("CNN_FILE_ID", ""),
235
+ "gnn_model.zip" : os.getenv("GNN_FILE_ID", ""),
236
+ }
237
+
238
+ for fname, fid in ids.items():
239
+ if fid:
240
+ download_from_drive(fid, f"./models/{fname}")
241
+
242
+ if os.path.exists("./models/bert_model.zip"):
243
+ extract_transformer("./models/bert_model.zip", "./models/bert")
244
+ if os.path.exists("./models/roberta_model.zip"):
245
+ extract_transformer("./models/roberta_model.zip", "./models/roberta")
246
+ if os.path.exists("./models/cnn_model.zip"):
247
+ extract_pt("./models/cnn_model.zip", "./models/cnn_best.pt")
248
+ if os.path.exists("./models/gnn_model.zip"):
249
+ extract_pt("./models/gnn_model.zip", "./models/gnn_best.pt")
250
+
251
+ print("Model setup complete.")
252
+
253
+ setup_models()
254
+
255
+ # ── Load Models ────────────────────────────────────────
256
+ print(f"Loading models on {DEVICE}...")
257
+
258
+ try:
259
+ models["bert_tokenizer"] = BertTokenizer.from_pretrained("./models/bert")
260
+ models["bert"] = BertForSequenceClassification.from_pretrained("./models/bert").to(DEVICE).eval()
261
+ print("BERT loaded.")
262
+ except Exception as e: print(f"BERT failed: {e}")
263
+
264
+ try:
265
+ models["roberta_tokenizer"] = RobertaTokenizer.from_pretrained("./models/roberta")
266
+ models["roberta"] = RobertaForSequenceClassification.from_pretrained("./models/roberta").to(DEVICE).eval()
267
+ print("RoBERTa loaded.")
268
+ except Exception as e: print(f"RoBERTa failed: {e}")
269
+
270
+ try:
271
+ ckpt = torch.load("./models/cnn_best.pt", map_location=DEVICE, weights_only=False)
272
+ cnn = PhishingCNN(vocab_size=ckpt.get("vocab_size", VOCAB_SIZE))
273
+ cnn.load_state_dict(ckpt["model_state"])
274
+ models["cnn"] = cnn.to(DEVICE).eval()
275
+ models["char_to_idx"] = ckpt.get("char_to_idx", char_to_idx)
276
+ print("CNN loaded.")
277
+ except Exception as e: print(f"CNN failed: {e}")
278
+
279
+ try:
280
+ if GNN_AVAILABLE:
281
+ ckpt = torch.load("./models/gnn_best.pt", map_location=DEVICE, weights_only=False)
282
+ gnn = PhishingGNN(ckpt.get("num_features", NUM_FEATURES),
283
+ ckpt.get("hidden_dim", 128),
284
+ ckpt.get("num_classes", 2),
285
+ ckpt.get("dropout", 0.3))
286
+ gnn.load_state_dict(ckpt["model_state"])
287
+ models["gnn"] = gnn.to(DEVICE).eval()
288
+ models["scaler"] = ckpt["scaler"]
289
+ print("GNN loaded.")
290
+ except Exception as e: print(f"GNN failed: {e}")
291
+
292
+ try:
293
+ if "scaler" not in models:
294
+ with open("./models/scaler.pkl", "rb") as f:
295
+ models["scaler"] = pickle.load(f)
296
+ except: pass
297
+
298
+ try:
299
+ with open("./models/fusion_model.pkl", "rb") as f:
300
+ models["fusion"] = pickle.load(f)
301
+ print("Fusion loaded.")
302
+ except Exception as e: print(f"Fusion failed: {e}")
303
+
304
+ loaded = [k for k in models if not k.endswith("tokenizer") and not k.endswith("_to_idx")]
305
+ print(f"Models ready: {loaded}")
306
+
307
+ # ── Prediction Functions ───────────────────────────────
308
+ def pb(url):
309
+ if "bert" not in models: return 0.5
310
+ try:
311
+ enc = models["bert_tokenizer"](url, add_special_tokens=True, max_length=MAX_LEN,
312
+ padding="max_length", truncation=True, return_tensors="pt")
313
+ with torch.no_grad():
314
+ return torch.softmax(models["bert"](
315
+ input_ids=enc["input_ids"].to(DEVICE),
316
+ attention_mask=enc["attention_mask"].to(DEVICE)
317
+ ).logits, dim=1)[0][1].item()
318
+ except: return 0.5
319
+
320
+ def pr(url):
321
+ if "roberta" not in models: return 0.5
322
+ try:
323
+ enc = models["roberta_tokenizer"](url, add_special_tokens=True, max_length=MAX_LEN,
324
+ padding="max_length", truncation=True, return_tensors="pt")
325
+ with torch.no_grad():
326
+ return torch.softmax(models["roberta"](
327
+ input_ids=enc["input_ids"].to(DEVICE),
328
+ attention_mask=enc["attention_mask"].to(DEVICE)
329
+ ).logits, dim=1)[0][1].item()
330
+ except: return 0.5
331
+
332
+ def pc(url):
333
+ if "cnn" not in models: return 0.5
334
+ try:
335
+ cidx = models.get("char_to_idx", char_to_idx)
336
+ enc = [cidx.get(c, 1) for c in str(url)[:MAX_URL_LEN]]
337
+ enc = enc + [0] * (MAX_URL_LEN - len(enc))
338
+ with torch.no_grad():
339
+ return torch.softmax(models["cnn"](
340
+ torch.tensor([enc], dtype=torch.long).to(DEVICE)
341
+ ), dim=1)[0][1].item()
342
+ except: return 0.5
343
+
344
+ def pg(url):
345
+ if "gnn" not in models or "scaler" not in models: return 0.5
346
+ try:
347
+ f = models["scaler"].transform(extract_features(url).reshape(1, -1))
348
+ x = torch.tensor(f, dtype=torch.float).to(DEVICE)
349
+ ei = torch.tensor([[0], [0]], dtype=torch.long).to(DEVICE)
350
+ with torch.no_grad():
351
+ return torch.softmax(models["gnn"](x, ei), dim=1)[0][1].item()
352
+ except: return 0.5
353
+
354
+ def pf(b, r, c, g):
355
+ if "fusion" not in models: return float(np.mean([b, r, c, g]))
356
+ try: return float(models["fusion"].predict_proba(np.array([[b, r, c, g]]))[0][1])
357
+ except: return float(np.mean([b, r, c, g]))
358
+
359
+ # ── Request Models ─────────────────────────────────────
360
+ class ScanRequest(BaseModel):
361
+ url: str
362
+
363
+ class FeedbackRequest(BaseModel):
364
+ url: str
365
+ is_phishing: bool
366
+ user_comment: Optional[str] = ""
367
+
368
+ class BulkScanRequest(BaseModel):
369
+ urls: List[str]
370
+
371
+ # ── Endpoints ──────────────────────────────────────────
372
+ @app.get("/")
373
+ def root():
374
+ loaded = [k for k in models if not k.endswith("tokenizer") and not k.endswith("_to_idx")]
375
+ return {"message": "AdaptiveShield API", "status": "running",
376
+ "models": loaded, "device": str(DEVICE)}
377
+
378
+ @app.get("/health")
379
+ def health():
380
+ loaded = [k for k in models if not k.endswith("tokenizer") and not k.endswith("_to_idx")]
381
+ return {"status": "healthy", "models_loaded": loaded,
382
+ "timestamp": datetime.now().isoformat()}
383
+
384
+ @app.post("/scan")
385
+ def scan_url(request: ScanRequest):
386
+ url = request.url.strip()
387
+ if not url: raise HTTPException(status_code=400, detail="URL cannot be empty.")
388
+ start = time.time()
389
+ b,r,c,g = pb(url), pr(url), pc(url), pg(url)
390
+ fp = pf(b, r, c, g)
391
+ extra = analyze_extra(url)
392
+ boost = 0.0
393
+ if extra["typosquatting_detected"] and extra["typo_distance"] == 1: boost += 0.10
394
+ if extra["ip_as_domain"]: boost += 0.15
395
+ if extra["homograph_detected"]: boost += 0.10
396
+ if extra["suspicious_tld"] and extra["brand_impersonation"]: boost += 0.08
397
+ final = min(1.0, fp + boost)
398
+ result = {
399
+ "url" : url,
400
+ "label" : "PHISHING" if final >= 0.5 else "LEGITIMATE",
401
+ "phishing_probability": round(final * 100, 2),
402
+ "risk_level" : get_risk_level(final),
403
+ "model_scores" : {
404
+ "bert": round(b*100,2), "roberta": round(r*100,2),
405
+ "cnn" : round(c*100,2), "gnn" : round(g*100,2),
406
+ "fusion": round(fp*100,2), "final": round(final*100,2)
407
+ },
408
+ "extra_analysis" : extra,
409
+ "scan_time_ms" : round((time.time()-start)*1000, 2),
410
+ "timestamp" : datetime.now().isoformat()
411
+ }
412
+ scan_history.append(result)
413
+ return result
414
+
415
+ @app.post("/scan/bulk")
416
+ def scan_bulk(request: BulkScanRequest):
417
+ if len(request.urls) > 50:
418
+ raise HTTPException(status_code=400, detail="Max 50 URLs.")
419
+ results = []; ph = 0
420
+ for url in request.urls:
421
+ try:
422
+ res = scan_url(ScanRequest(url=url))
423
+ results.append(res)
424
+ ph += 1 if res.get("label") == "PHISHING" else 0
425
+ except Exception as e:
426
+ results.append({"url": url, "error": str(e)})
427
+ return {"total_scanned": len(results), "phishing_found": ph,
428
+ "legitimate_found": len(results)-ph, "results": results}
429
+
430
+ @app.post("/feedback")
431
+ def feedback(request: FeedbackRequest):
432
+ feedback_store.append({"url": request.url, "is_phishing": request.is_phishing,
433
+ "comment": request.user_comment,
434
+ "timestamp": datetime.now().isoformat()})
435
+ return {"message": "Feedback received.", "total_feedback": len(feedback_store)}
436
+
437
+ @app.get("/history")
438
+ def history(limit: int = 20):
439
+ return {"total_scans": len(scan_history), "results": scan_history[-limit:]}
440
+
441
+ @app.get("/stats")
442
+ def stats():
443
+ if not scan_history: return {"message": "No scans yet."}
444
+ total = len(scan_history)
445
+ ph = sum(1 for s in scan_history if s.get("label") == "PHISHING")
446
+ return {"total_scans": total, "phishing_detected": ph,
447
+ "legitimate_detected": total-ph,
448
+ "phishing_rate_percent": round(ph/total*100, 2),
449
+ "average_scan_time_ms": round(np.mean([s.get("scan_time_ms",0) for s in scan_history]), 2)}
450
+ ```
451
+
452
+ Click **Commit changes to main**.
453
+
454
+ ---
455
+
456
+ ## STEP 5 : Create requirements.txt
457
+
458
+ Click **Add file**. Click **Create new file**. Name it **requirements.txt**. Paste this.
459
+ ```
460
+ fastapi==0.111.0
461
+ uvicorn==0.30.1
462
+ torch==2.1.0
463
+ transformers==4.44.0
464
+ tokenizers==0.19.1
465
+ torch_geometric
466
+ scikit-learn>=1.3.0
467
+ numpy>=1.24.0
468
+ python-Levenshtein==0.25.1
469
+ pydantic>=2.0.0
470
+ python-multipart==0.0.9