jerrynnms commited on
Commit
f3dfb38
·
verified ·
1 Parent(s): 95a7116

Upload 12 files

Browse files
Files changed (12) hide show
  1. AI_Model_architecture.py +212 -0
  2. Dockerfile +9 -0
  3. LICENSE +21 -0
  4. README.md +2 -12
  5. app.py +123 -0
  6. bert_explainer.py +67 -0
  7. index.html +46 -0
  8. requirements.txt +9 -0
  9. script.js +107 -0
  10. style.css +184 -0
  11. test_firebase.py +34 -0
  12. test_model_load.py +10 -0
AI_Model_architecture.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """流程圖
2
+ 讀取資料 → 分割資料 → 編碼 → 建立 Dataset / DataLoader
3
+
4
+ 建立模型(BERT+LSTM+CNN)
5
+
6
+ BERT 輸出 [batch, seq_len, 768]
7
+
8
+ BiLSTM [batch, seq_len, hidden_dim*2]
9
+
10
+ CNN 模組 (Conv1D + Dropout + GlobalMaxPooling1D)
11
+
12
+ Linear 分類器(輸出詐騙機率)
13
+
14
+ 訓練模型(Epochs)
15
+
16
+ 評估模型(Accuracy / F1 / Precision / Recall)
17
+
18
+ 儲存模型(.pth)
19
+
20
+ """#引入重要套件Import Library
21
+ import torch # PyTorch 主模組
22
+ import torch.nn as nn # 神經網路相關的層(例如 LSTM、Linear)
23
+ import torch.nn.functional as F # 提供純函式版的操作方法,像是 F.relu()、F.cross_entropy(),通常不帶參數、不自動建立權重
24
+ import numpy as np
25
+ import pandas as pd
26
+ import os
27
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:16"#讓 CUDA 使用「更小記憶體分配塊」的方法,能有效減少 OOM 錯誤。
28
+ import re
29
+
30
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
31
+ from tqdm import tqdm
32
+ from sklearn.model_selection import train_test_split
33
+ from torch.utils.data import DataLoader, Dataset # 提供 Dataset、DataLoader 類別
34
+ from transformers import BertTokenizer
35
+ from sklearn.model_selection import train_test_split
36
+ from transformers import BertModel
37
+ #BertTokenizer 把文字句子轉換成 BERT 格式的 token ID,例如 [CLS] 今天 天氣 不錯 [SEP] → [101, 1234, 5678, ...]
38
+ ##BertForSequenceClassification 是 Hugging Face 提供的一個完整 BERT 模型,接了分類用的 Linear 層,讓你直接拿來做分類任務(例如詐騙 vs 正常)
39
+
40
+
41
+ #正常訊息資料集在這新增
42
+ normal_files = [r"C:\Users\user\Desktop\專案程式0527\Project_PredictScamInfo\data\NorANDScamInfo_data1.csv"]
43
+
44
+ #詐騙訊息資料集在這新增
45
+ scam_files = [
46
+ r"C:\Users\user\Desktop\專案程式0527\Project_PredictScamInfo\data\NorANDScamInfo_data1.csv"]
47
+
48
+ #資料前處理
49
+ class BertPreprocessor:
50
+ def __init__(self, tokenizer_name="ckiplab/bert-base-chinese", max_len=128):
51
+ self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name)
52
+ self.max_len = max_len
53
+
54
+ def load_and_clean(self, filepath):
55
+ #載入 CSV 並清理 message 欄位。
56
+ df = pd.read_csv(filepath)
57
+ df = df.dropna().drop_duplicates().reset_index(drop=True)
58
+ # 文字清理:移除空白、保留中文英數與標點
59
+ df["message"] = df["message"].astype(str)
60
+ df["message"] = df["message"].apply(lambda text: re.sub(r"\s+", "", text))
61
+ df["message"] = df["message"].apply(lambda text: re.sub(r"[^\u4e00-\u9fffA-Za-z0-9。,!?]", "", text))
62
+ return df[["message", "label"]] # 保留必要欄位
63
+
64
+ def encode(self, messages):
65
+ #使用 HuggingFace BERT Tokenizer 將訊息編碼成模型輸入格式。
66
+ return self.tokenizer(
67
+ list(messages),
68
+ return_tensors="pt",
69
+ truncation=True,
70
+ padding="max_length",
71
+ max_length=self.max_len
72
+ )
73
+ #自動做資料前處理
74
+ def build_bert_inputs(normal_files, scam_files):
75
+ #將正常與詐騙資料分別指定 label,統一清理、編碼,回傳模型可用的 input tensors 與 labels。
76
+ processor = BertPreprocessor()
77
+ dfs = []
78
+ # 合併正常 + 詐騙檔案清單
79
+ all_files = normal_files + scam_files
80
+
81
+ for filepath in all_files:
82
+ df = processor.load_and_clean(filepath)
83
+ dfs.append(df)
84
+
85
+ # 合併所有資料。在資料清理過程中dropna():刪除有空值的列,drop_duplicates():刪除重複列,filter()或df[...]做條件過濾,concat():將多個 DataFrame合併
86
+ # 這些操作不會自動重排索引,造成索引亂掉。
87
+ # 合併後統一編號(常見於多筆資料合併)all_df = pd.concat(dfs, 關鍵-->ignore_index=True)
88
+ all_df = pd.concat(dfs, ignore_index=True)
89
+ #製作 train/val 資料集
90
+ train_texts, val_texts, train_labels, val_labels = train_test_split(
91
+ all_df["message"], all_df["label"],
92
+ stratify=all_df["label"],
93
+ test_size=0.2,
94
+ random_state=25,
95
+ shuffle=True
96
+ )
97
+
98
+ # 進行 BERT tokenizer 編碼
99
+ train_inputs = processor.encode(train_texts)
100
+ val_inputs = processor.encode(val_texts)
101
+
102
+ return train_inputs, train_labels, val_inputs, val_labels, processor
103
+
104
+ #AUTO YA~以for迴圈自動新增個別變數內,build_bert_inputs能自動擷取新增資料
105
+ normal_files_labels = [normal for normal in normal_files]
106
+ scam_files_labels = [scam for scam in scam_files]
107
+
108
+ #print(bert_inputs.keys())
109
+
110
+ #定義 PyTorch Dataset 類別
111
+ class ScamDataset(Dataset):
112
+ def __init__(self, inputs, labels):
113
+ self.input_ids = inputs["input_ids"] # input_ids:句子的 token ID; attention_mask:注意力遮罩(0 = padding)
114
+ self.attention_mask = inputs["attention_mask"] # token_type_ids:句子的 segment 區分
115
+ self.token_type_ids = inputs["token_type_ids"] # torch.tensor(x, dtype=...)將資料(x)轉為Tensor的標準做法。
116
+ self.labels = torch.tensor(labels.values, dtype=torch.float32) # x可以是 list、NumPy array、pandas series...
117
+ # dtypefloat32:浮點數(常用於 回歸 或 BCELoss 二分類);long:整數(常用於 多分類 搭配 CrossEntropyLoss)。labels.values → 轉為 NumPy array
118
+ def __len__(self): # 告訴 PyTorch 這個 Dataset 有幾筆資料
119
+ return len(self.labels) # 給 len(dataset) 或 for i in range(len(dataset)) 用的
120
+
121
+ def __getitem__(self, idx): #回傳第 idx 筆資料(會自動在訓練中一筆筆抓)
122
+ return { #DataLoader 每次會呼叫這個方法多次來抓一個 batch 的資料
123
+ "input_ids":self.input_ids[idx],
124
+ "attention_mask":self.attention_mask[idx],
125
+ "token_type_ids":self.token_type_ids[idx],
126
+ "labels":self.labels[idx]
127
+ }
128
+
129
+ # 這樣可以同時處理 scam 和 normal 資料,不用重複寫清理與 token 處理
130
+ train_inputs, train_labels, val_inputs, val_labels, processor = build_bert_inputs(normal_files, scam_files)
131
+
132
+ train_dataset = ScamDataset(train_inputs, train_labels)
133
+ val_dataset = ScamDataset(val_inputs, val_labels)
134
+
135
+ train_loader = DataLoader(train_dataset, batch_size=8)
136
+ val_loader = DataLoader(val_dataset, batch_size=8)
137
+
138
+ #模型
139
+ class BertLSTM_CNN_Classifier(nn.Module):
140
+ def __init__(self, hidden_dim=128, num_layers=1, dropout=0.3):
141
+ super(BertLSTM_CNN_Classifier, self).__init__()
142
+ self.bert = BertModel.from_pretrained("ckiplab/bert-base-chinese") #載入預訓練 BERT 模型(ckiplab 中文版)
143
+ # LSTM 接在 BERT 的 token 輸出後(輸入是768維)
144
+ self.LSTM = nn.LSTM(input_size=768, # 把 BERT 的 token 序列再交給雙向 LSTM 做時間序列建模
145
+ hidden_size=hidden_dim,
146
+ num_layers=num_layers,
147
+ batch_first=True,
148
+ bidirectional=True)
149
+ # CNN 模組:接在 LSTM 後的輸出上
150
+ self.conv1 = nn.Conv1d(in_channels=hidden_dim*2,
151
+ out_channels=128,
152
+ kernel_size=3,
153
+ padding=1)
154
+ self.dropout = nn.Dropout(dropout)
155
+ self.global_maxpool = nn.AdaptiveAvgPool1d(1) # 等效於 GlobalMaxPooling1D
156
+
157
+ self.classifier = nn.Linear(128,1)
158
+ def forward(self, input_ids, attention_mask, token_type_ids):
159
+ outputs = self.bert(input_ids=input_ids,
160
+ attention_mask=attention_mask,
161
+ token_type_ids=token_type_ids)
162
+ hidden_states = outputs.last_hidden_state # [batch, seq_len, 768]
163
+
164
+ LSTM_out, _ = self.LSTM(hidden_states) # [batch, seq_len, hidden_dim*2]
165
+ LSTM_out = LSTM_out.transpose(1, 2) # [batch, hidden_dim*2, seq_len]
166
+
167
+ x = self.conv1(LSTM_out) # [batch, 128, seq_len]
168
+ x = self.dropout(x)
169
+ x = self.global_maxpool(x).squeeze(2) # [batch, 128]
170
+
171
+ logits = self.classifier(x)
172
+ return torch.sigmoid(logits).view(-1) # 👈 修正這行
173
+
174
+
175
+ # 設定 GPU 裝置
176
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
177
+ # 設定使用的最大執行緒數(視 CPU 而定)
178
+ torch.set_num_threads(8) # 建議設成你系統的實體核心數
179
+ # 初始化模型
180
+ model = BertLSTM_CNN_Classifier().to(device)
181
+ # 定義 optimizer 和損失函數
182
+ optimizer = torch.optim.Adam(model.parameters(),lr=2e-5)
183
+ criterion = nn.BCELoss()
184
+
185
+ # 訓練迴圈
186
+
187
+ if __name__ == "__main__":
188
+ if os.path.exists("model.pth"):
189
+ print("✅ 已找到 model.pth,載入模型跳過訓練")
190
+ model.load_state_dict(torch.load("model.pth", map_location=device))
191
+ else:
192
+ print("🚀 未找到 model.pth,開始訓練模型...")
193
+ num_epochs = 10
194
+ for epoch in range(num_epochs):
195
+ model.train()
196
+ total_loss = 0.0
197
+ for batch in train_loader:
198
+ optimizer.zero_grad()
199
+ input_ids = batch["input_ids"].to(device)
200
+ attention_mask = batch["attention_mask"].to(device)
201
+ token_type_ids = batch["token_type_ids"].to(device)
202
+ labels = batch["labels"].to(device)
203
+
204
+ outputs = model(input_ids, attention_mask, token_type_ids)
205
+ loss = criterion(outputs, labels)
206
+ loss.backward()
207
+ optimizer.step()
208
+ total_loss += loss.item()
209
+ print(f"[Epoch{epoch+1}]Training Loss:{total_loss:.4f}")
210
+ torch.save(model.state_dict(), "model.pth")# 儲存模型權重
211
+ print("✅ 模型訓練完成並儲存為 model.pth")
212
+
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+ COPY . .
5
+
6
+ RUN pip install --upgrade pip
7
+ RUN pip install -r requirements.txt
8
+
9
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 jerrynnm
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: Scam Detector
3
- emoji: 👁
4
- colorFrom: gray
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: Scam Detection API using FastAPI & PyTorch
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # ScamChecker
2
+ A web app to detect scams using a machine learning model.
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from datetime import datetime
5
+ from typing import Optional, List
6
+ from bert_explainer import analyze_text as bert_analyze_text
7
+ from firebase_admin import credentials, firestore
8
+ import firebase_admin
9
+ import pytz
10
+ import os
11
+ import json
12
+ import requests
13
+ import torch
14
+
15
+ app = FastAPI(
16
+ title="詐騙訊息辨識 API",
17
+ description="使用 BERT 模型分析輸入文字是否為詐騙內容",
18
+ version="1.0.0"
19
+ )
20
+
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=["*"],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ class TextAnalysisRequest(BaseModel):
30
+ text: str
31
+ user_id: Optional[str] = None
32
+
33
+ class TextAnalysisResponse(BaseModel):
34
+ status: str
35
+ confidence: float
36
+ suspicious_keywords: List[str]
37
+ analysis_timestamp: datetime
38
+ text_id: str
39
+
40
+ # 初始化 Firebase 使用環境變數
41
+ try:
42
+ cred_data = os.getenv("FIREBASE_CREDENTIALS")
43
+ if not cred_data:
44
+ raise ValueError("FIREBASE_CREDENTIALS 環境變數未設置")
45
+ cred = credentials.Certificate({"type": "service_account", **json.loads(cred_data)})
46
+ firebase_admin.initialize_app(cred)
47
+ db = firestore.client()
48
+ except Exception as e:
49
+ print(f"Firebase 初始化錯誤: {e}")
50
+
51
+ # 從 Google Drive 載入 model.pth
52
+ def load_model_from_drive():
53
+ model_url = "https://drive.google.com/uc?export=download&id=1UXkOqMPUiPUIbsy8iENHUqbNFLEHcFFg" # 替換為你的檔案 ID
54
+ response = requests.get(model_url)
55
+ if response.status_code == 200:
56
+ with open("model.pth", "wb") as f:
57
+ f.write(response.content)
58
+ return True
59
+ return False
60
+
61
+ if not os.path.exists("model.pth"):
62
+ if not load_model_from_drive():
63
+ raise FileNotFoundError("無法從 Google Drive 載入 model.pth")
64
+
65
+ from AI_Model_architecture import BertLSTM_CNN_Classifier
66
+ model = BertLSTM_CNN_Classifier()
67
+ model.load_state_dict(torch.load("model.pth", map_location="cpu"))
68
+ model.eval()
69
+
70
+ @app.get("/")
71
+ async def root():
72
+ return {"message": "詐騙文字辨識 API 已啟動", "version": "1.0.0", "status": "active", "docs": "/docs"}
73
+
74
+ @app.post("/predict", response_model=TextAnalysisResponse)
75
+ async def analyze_text_api(request: TextAnalysisRequest):
76
+ try:
77
+ tz = pytz.timezone("Asia/Taipei")
78
+ taiwan_now = datetime.now(tz)
79
+ collection_name = taiwan_now.strftime("%Y%m%d")
80
+ document_id = taiwan_now.strftime("%Y%m%dT%H%M%S")
81
+ timestamp_str = taiwan_now.strftime("%Y-%m-%d %H:%M:%S")
82
+
83
+ result = bert_analyze_text(request.text)
84
+
85
+ record = {
86
+ "text_id": document_id,
87
+ "text": request.text,
88
+ "user_id": request.user_id,
89
+ "analysis_result": {
90
+ "status": result["status"],
91
+ "confidence": result["confidence"],
92
+ "suspicious_keywords": result["suspicious_keywords"],
93
+ },
94
+ "timestamp": timestamp_str,
95
+ "type": "text_analysis"
96
+ }
97
+
98
+ db.collection(collection_name).document(document_id).set(record)
99
+
100
+ return TextAnalysisResponse(
101
+ status=result["status"],
102
+ confidence=result["confidence"],
103
+ suspicious_keywords=result["suspicious_keywords"],
104
+ analysis_timestamp=taiwan_now,
105
+ text_id=document_id
106
+ )
107
+ except Exception as e:
108
+ raise HTTPException(status_code=500, detail=str(e))
109
+
110
+ @app.post("/feedback")
111
+ async def save_user_feedback(feedback: dict):
112
+ try:
113
+ tz = pytz.timezone("Asia/Taipei")
114
+ taiwan_now = datetime.now(tz)
115
+ timestamp_str = taiwan_now.strftime("%Y-%m-%d %H:%M:%S")
116
+
117
+ feedback["used_in_training"] = False
118
+ feedback["timestamp"] = timestamp_str
119
+
120
+ db.collection("user_feedback").add(feedback)
121
+ return {"message": "✅ 已記錄使用者回饋"}
122
+ except Exception as e:
123
+ raise HTTPException(status_code=500, detail=str(e))
bert_explainer.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from AI_Model_architecture import BertLSTM_CNN_Classifier, BertPreprocessor
3
+ from transformers import BertTokenizer
4
+ import re
5
+ import requests
6
+ import os
7
+
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
+ # 從 Google Drive 載入 model.pth
11
+ def load_model_from_drive():
12
+ model_url = "https://drive.google.com/uc?export=download&id=1UXkOqMPUiPUIbsy8iENHUqbNFLEHcFFg" # 替換為你的檔案 ID
13
+ response = requests.get(model_url)
14
+ if response.status_code == 200:
15
+ with open("model.pth", "wb") as f:
16
+ f.write(response.content)
17
+ return True
18
+ return False
19
+
20
+ if not os.path.exists("model.pth"):
21
+ if not load_model_from_drive():
22
+ raise FileNotFoundError("無法從 Google Drive 載入 model.pth")
23
+
24
+ model = BertLSTM_CNN_Classifier()
25
+ model.load_state_dict(torch.load("model.pth", map_location=device))
26
+ model.to(device)
27
+ model.eval()
28
+
29
+ tokenizer = BertTokenizer.from_pretrained("ckiplab/bert-base-chinese")
30
+
31
+ def predict_single_sentence(model, tokenizer, sentence, max_len=256):
32
+ model.eval()
33
+ with torch.no_grad():
34
+ sentence = re.sub(r"\s+", "", sentence)
35
+ sentence = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9。,!?:/.\-]", "", sentence)
36
+
37
+ encoded = tokenizer(sentence, return_tensors="pt", truncation=True, padding="max_length", max_length=max_len)
38
+ input_ids = encoded["input_ids"].to(device)
39
+ attention_mask = encoded["attention_mask"].to(device)
40
+ token_type_ids = encoded["token_type_ids"].to(device)
41
+
42
+ output = model(input_ids, attention_mask, token_type_ids)
43
+ prob = output.item()
44
+ label = int(prob > 0.5)
45
+
46
+ if prob > 0.9:
47
+ risk = "🔴 高風險(極可能是詐騙)"
48
+ elif prob > 0.5:
49
+ risk = "🟡 中風險(可疑)"
50
+ else:
51
+ risk = "🟢 低風險(正常)"
52
+
53
+ pre_label = "詐騙" if label == 1 else "正常"
54
+
55
+ print(f"\n📩 訊息內容:{sentence}")
56
+ print(f"✅ 預測結果:{pre_label}")
57
+ print(f"📊 信心值:{round(prob*100, 2)}")
58
+ print(f"⚠️ 風險等級:{risk}")
59
+ return pre_label, prob, risk
60
+
61
+ def analyze_text(text):
62
+ label, prob, risk = predict_single_sentence(model, tokenizer, text)
63
+ return {
64
+ "status": label,
65
+ "confidence": round(prob*100, 2),
66
+ "suspicious_keywords": [risk]
67
+ }
index.html ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="zh-Hant">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>預測詐騙訊息</title>
7
+ <link rel="stylesheet" href="style.css">
8
+ </head>
9
+ <body>
10
+ <h1>檢查可疑訊息</h1>
11
+
12
+ <div class="main-container">
13
+ <!-- 使用者輸入 -->
14
+ <section id="input_area" class="panel">
15
+ <textarea id="predict_info" placeholder="請輸入內容 (最多5000字)" maxlength="5000"></textarea>
16
+ <div class="button-group">
17
+ <button id="detect_button" type="submit">檢測!</button>
18
+ <button id="clear_button" type="reset">清除</button>
19
+ </div>
20
+ </section>
21
+
22
+ <!-- 模型預測結果 + 使用者回饋(已合併) -->
23
+ <section id="output_area" class="panel">
24
+ <h2>檢測結果</h2>
25
+ <p><strong>是否為詐騙訊息:</strong> <span id="is_scam">待檢測</span></p>
26
+ <p><strong>模型預測可疑度:</strong> <span id="confidence_score">待檢測</span></p>
27
+ <p><strong>可疑詞句分析:</strong></p>
28
+ <div id="suspicious_phrases">
29
+ <p>請輸入訊息並點擊「檢測!」按鈕。</p>
30
+ </div>
31
+
32
+ <!-- ✅ 使用者回饋區塊放在 output_area 內 -->
33
+ <section id="feedback_area" style="display: none;">
34
+ <p><strong>這筆預測結果正確嗎?</strong></p>
35
+ <div class="button-group">
36
+ <button id="feedback_correct">✅ 正確</button>
37
+ <button id="feedback_wrong">❌ 錯誤</button>
38
+ </div>
39
+ <p id="feedback_status" style="color: green;"></p>
40
+ </section>
41
+ </section>
42
+ </div>
43
+
44
+ <script src="script.js"></script>
45
+ </body>
46
+ </html>
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pydantic
5
+ firebase-admin
6
+ torch==2.2.0
7
+ transformers
8
+ pytz
9
+ requests
script.js ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /// script.js
2
+ document.addEventListener('DOMContentLoaded', () => {
3
+ const inputTextArea = document.getElementById('predict_info');
4
+ const inputButton = document.getElementById('detect_button');
5
+ const clearButton = document.getElementById('clear_button');
6
+ const normalOrScam = document.getElementById('is_scam');
7
+ const confidenceScoreSpan = document.getElementById('confidence_score');
8
+ const suspiciousPhrasesDiv = document.getElementById('suspicious_phrases');
9
+ const feedbackArea = document.getElementById('feedback_area');
10
+ const feedbackCorrectBtn = document.getElementById('feedback_correct');
11
+ const feedbackWrongBtn = document.getElementById('feedback_wrong');
12
+ const feedbackStatus = document.getElementById('feedback_status');
13
+
14
+ let lastPrediction = null;
15
+
16
+ // 使用相對路徑,Vercel 會自動解析
17
+ const API_URL = '/predict';
18
+ const FEEDBACK_API = '/feedback';
19
+
20
+ inputButton.addEventListener('click', async () => {
21
+ const message = inputTextArea.value.trim();
22
+ if (!message) {
23
+ alert('請輸入您想檢測的訊息內容。');
24
+ return;
25
+ }
26
+
27
+ normalOrScam.textContent = '檢測中...';
28
+ normalOrScam.style.color = 'gray';
29
+ confidenceScoreSpan.textContent = '計算中...';
30
+ suspiciousPhrasesDiv.innerHTML = '<p>正在分析訊息,請稍候...</p>';
31
+ feedbackArea.style.display = 'none';
32
+ feedbackStatus.textContent = '';
33
+ feedbackCorrectBtn.style.display = 'inline-block';
34
+ feedbackWrongBtn.style.display = 'inline-block';
35
+
36
+ try {
37
+ const response = await fetch(API_URL, {
38
+ method: 'POST',
39
+ headers: { 'Content-Type': 'application/json' },
40
+ body: JSON.stringify({ text: message }),
41
+ });
42
+
43
+ if (!response.ok) throw new Error(`伺服器錯誤: ${response.status} ${response.statusText}`);
44
+ const data = await response.json();
45
+
46
+ updateResults(data.status, data.confidence, data.suspicious_keywords);
47
+ feedbackArea.style.display = 'block';
48
+ lastPrediction = { text: message, model_status: data.status };
49
+ } catch (error) {
50
+ console.error('訊息檢測失敗:', error);
51
+ alert(`訊息檢測失敗,請檢查後端服務。\n錯誤詳情: ${error.message}`);
52
+ resetResults();
53
+ }
54
+ });
55
+
56
+ clearButton.addEventListener('click', () => {
57
+ inputTextArea.value = '';
58
+ resetResults();
59
+ feedbackArea.style.display = 'none';
60
+ feedbackStatus.textContent = '';
61
+ });
62
+
63
+ feedbackCorrectBtn.addEventListener('click', () => submitFeedback('正確'));
64
+ feedbackWrongBtn.addEventListener('click', () => submitFeedback('錯誤'));
65
+
66
+ async function submitFeedback(user_feedback) {
67
+ if (!lastPrediction) return;
68
+ const payload = { ...lastPrediction, user_feedback };
69
+ try {
70
+ const res = await fetch(FEEDBACK_API, {
71
+ method: 'POST',
72
+ headers: { 'Content-Type': 'application/json' },
73
+ body: JSON.stringify(payload),
74
+ });
75
+ const msg = await res.json();
76
+ feedbackStatus.textContent = '✅ 感謝你的回饋!';
77
+ feedbackCorrectBtn.style.display = 'none';
78
+ feedbackWrongBtn.style.display = 'none';
79
+ } catch (e) {
80
+ feedbackStatus.textContent = '❌ 回饋提交失敗';
81
+ }
82
+ }
83
+
84
+ function updateResults(isScam, confidence, suspiciousParts) {
85
+ normalOrScam.textContent = isScam;
86
+ confidenceScoreSpan.textContent = confidence;
87
+ suspiciousPhrasesDiv.innerHTML = '';
88
+ if (suspiciousParts && suspiciousParts.length > 0) {
89
+ const ul = document.createElement('ul');
90
+ suspiciousParts.forEach(phrase => {
91
+ const li = document.createElement('li');
92
+ li.textContent = phrase;
93
+ ul.appendChild(li);
94
+ });
95
+ suspiciousPhrasesDiv.appendChild(ul);
96
+ } else {
97
+ suspiciousPhrasesDiv.innerHTML = '<p>沒有偵測到特別可疑的詞句。</p>';
98
+ }
99
+ }
100
+
101
+ function resetResults() {
102
+ normalOrScam.textContent = '待檢測';
103
+ normalOrScam.style.color = 'inherit';
104
+ confidenceScoreSpan.textContent = '待檢測';
105
+ suspiciousPhrasesDiv.innerHTML = '<p>請輸入訊息並點擊「檢測!」按鈕。</p>';
106
+ }
107
+ });
style.css ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* style.css */
2
+
3
+ body {
4
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
5
+ margin: 0; /* 將 body 的 margin 設為 0,讓內容可以更貼近邊緣 */
6
+ padding: 20px; /* 內邊距留點空間 */
7
+ background-color: #f4f7f6;
8
+ color: #333;
9
+ line-height: 1.6;
10
+ display: flex; /* 讓 body 成為 flex 容器 */
11
+ flex-direction: column; /* 內容垂直排列 */
12
+ min-height: 100vh; /* 讓 body 至少佔滿整個視窗高度 */
13
+ align-items: center; /* 讓 h1 居中 */
14
+ }
15
+
16
+ h1 {
17
+ color: #2c3e50;
18
+ text-align: center;
19
+ margin-bottom: 30px; /* 增加標題下方的間距 */
20
+ font-size: 2.5em; /* 讓標題更大一點 */
21
+ }
22
+
23
+ h2 { /* 針對檢測結果的 h2 */
24
+ color: #2c3e50;
25
+ text-align: center;
26
+ margin-top: 0; /* 移除頂部 margin,讓它更靠近 panel 頂部 */
27
+ margin-bottom: 20px;
28
+ font-size: 1.8em;
29
+ }
30
+
31
+ /* --- 主容器 Flexbox 佈局 --- */
32
+ .main-container {
33
+ display: flex; /* 啟用 Flexbox */
34
+ flex-direction: row; /* 預設就是 row,讓子元素水平排列 */
35
+ gap: 30px; /* 左右兩個 panel 之間的間距 */
36
+ width: 100%; /* 佔滿可用寬度 */
37
+ max-width: 1200px; /* 設定最大寬度,避免在寬螢幕上過於分散 */
38
+ justify-content: center; /* 內容居中 */
39
+ flex-wrap: wrap; /* 當螢幕太小時,允許換行 */
40
+ }
41
+
42
+ .panel {
43
+ background-color: #ffffff;
44
+ padding: 30px; /* 增加內邊距 */
45
+ border-radius: 8px;
46
+ box-shadow: 0 6px 12px rgba(0, 0, 0, 0.1); /* 更明顯的陰影 */
47
+ flex: 1; /* 讓兩個 panel 平均分配空間 */
48
+ min-width: 380px; /* 設定每個 panel 的最小寬度,避免縮得太小 */
49
+ box-sizing: border-box; /* 確保 padding 和 border 不會增加元素總寬度 */
50
+ display: flex; /* 讓 panel 內部內容也是 flex 容器 */
51
+ flex-direction: column; /* 內部內容垂直排列 */
52
+ }
53
+
54
+ #input_area {
55
+ /* 特定於 input_area 的樣式,如果需要 */
56
+ align-items: center; /* 讓輸入框和按鈕在 input_area 中居中 */
57
+ }
58
+
59
+
60
+
61
+ textarea {
62
+ width: 100%; /* 佔滿 panel 寬度 */
63
+ height: 250px; /* 增加高度 */
64
+ padding: 15px;
65
+ margin-bottom: 25px; /* 增加與按鈕的間距 */
66
+ border: 1px solid #ddd;
67
+ border-radius: 5px;
68
+ font-size: 1.1rem; /* 稍微放大字體 */
69
+ box-sizing: border-box;
70
+ resize: vertical;
71
+ outline: none; /* 移除 focus 時的藍色邊框 */
72
+ transition: border-color 0.3s ease;
73
+ }
74
+
75
+ textarea:focus {
76
+ border-color: #4CAF50; /* focus 時邊框變色 */
77
+ }
78
+
79
+
80
+ .button-group {
81
+ display: flex;
82
+ gap: 20px; /* 按鈕間距 */
83
+ justify-content: center; /* 按鈕在 group 內部居中 */
84
+ width: 100%; /* 佔滿寬度 */
85
+ }
86
+
87
+ button {
88
+ padding: 12px 30px; /* 稍微增加按鈕大小 */
89
+ font-size: 1.1rem;
90
+ cursor: pointer;
91
+ border: none;
92
+ border-radius: 5px;
93
+ transition: background-color 0.3s ease, transform 0.2s ease; /* 增加 transform 過渡效果 */
94
+ font-weight: bold; /* 字體加粗 */
95
+ }
96
+
97
+ button[type="submit"] {
98
+ background-color: #4CAF50;
99
+ color: white;
100
+ }
101
+
102
+ button[type="submit"]:hover {
103
+ background-color: #45a049;
104
+ transform: translateY(-2px); /* 懸停時向上輕微移動 */
105
+ }
106
+
107
+ button[type="reset"] {
108
+ background-color: #f44336;
109
+ color: white;
110
+ }
111
+
112
+ button[type="reset"]:hover {
113
+ background-color: #da190b;
114
+ transform: translateY(-2px);
115
+ }
116
+
117
+
118
+ #output_area p {
119
+ font-size: 1.15rem; /* 稍微放大結果文字 */
120
+ margin-bottom: 12px;
121
+ }
122
+
123
+ #output_area strong {
124
+ color: #555;
125
+ font-weight: bold;
126
+ }
127
+
128
+ #is_scam, #confidence_score {
129
+ font-weight: bold; /* 結果狀態字體加粗 */
130
+ }
131
+
132
+ #suspicious_phrases {
133
+ background-color: #fffafa; /* 給可疑詞句區塊一個淺色背景 */
134
+ border: 1px dashed #e0baba; /* 虛線邊框 */
135
+ padding: 15px;
136
+ border-radius: 5px;
137
+ margin-top: 15px;
138
+ min-height: 80px; /* 確保高度,避免內容少時高度變化 */
139
+ }
140
+
141
+ #suspicious_phrases ul {
142
+ list-style-type: '🚨 '; /* 使用表情符號作為列表標記 */
143
+ padding-left: 20px;
144
+ margin: 0; /* 移除預設 margin */
145
+ }
146
+
147
+ #suspicious_phrases li {
148
+ margin-bottom: 8px;
149
+ color: #c0392b;
150
+ font-weight: 500;
151
+ }
152
+
153
+ #suspicious_phrases p {
154
+ font-style: italic;
155
+ color: #666;
156
+ margin: 0; /* 移除預設 margin */
157
+ }
158
+
159
+ /* --- 響應式設計:當螢幕較小時,垂直排列 --- */
160
+ @media (max-width: 768px) {
161
+ .main-container {
162
+ flex-direction: column; /* 小螢幕時改為垂直堆疊 */
163
+ gap: 20px; /* 垂直間距 */
164
+ padding: 0 15px; /* 左右邊距 */
165
+ }
166
+
167
+ .panel {
168
+ flex: none; /* 取消 flex 比例,讓他們各自佔據 100% 寬度 */
169
+ width: 100%;
170
+ max-width: none; /* 移除最大寬度限制 */
171
+ }
172
+
173
+ h1 {
174
+ font-size: 2em;
175
+ }
176
+
177
+ h2 {
178
+ font-size: 1.5em;
179
+ }
180
+
181
+ textarea {
182
+ height: 200px;
183
+ }
184
+ }
test_firebase.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import credentials, firestore
3
+ from datetime import datetime
4
+
5
+ def test_firebase_connection():
6
+ try:
7
+ # 初始化 Firebase
8
+ cred = credentials.Certificate("firebase-credentials.json")
9
+ firebase_admin.initialize_app(cred)
10
+ db = firestore.client()
11
+
12
+ # 測試寫入
13
+ test_data = {
14
+ "test_field": "測試資料",
15
+ "timestamp": datetime.now()
16
+ }
17
+
18
+ # 寫入測試資料
19
+ doc_ref = db.collection('test').document('test_doc')
20
+ doc_ref.set(test_data)
21
+
22
+ # 讀取測試資料
23
+ doc = doc_ref.get()
24
+ if doc.exists:
25
+ print("Firebase 連接測試成功!")
26
+ print("測試資料:", doc.to_dict())
27
+ else:
28
+ print("無法讀取測試資料")
29
+
30
+ except Exception as e:
31
+ print(f"Firebase 連接測試失敗:{str(e)}")
32
+
33
+ if __name__ == "__main__":
34
+ test_firebase_connection()
test_model_load.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from AI_Model_architecture import BertLSTM_CNN_Classifier
3
+
4
+ try:
5
+ print("🚀 嘗試載入模型...")
6
+ model = BertLSTM_CNN_Classifier()
7
+ model.load_state_dict(torch.load("model.pth", map_location="cpu"))
8
+ print("✅ 模型成功載入!")
9
+ except Exception as e:
10
+ print("❌ 錯誤訊息:", str(e))