codingcoolfun9ed commited on
Commit
c985b4c
·
verified ·
1 Parent(s): 4e21cfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -151
app.py CHANGED
@@ -1,159 +1,57 @@
1
- import gradio as gr
2
- import torch
3
- import torch.nn as nn
4
- import numpy as np
5
- import pickle
6
- import re
7
  import os
8
- from nltk.tokenize.toktok import ToktokTokenizer
9
 
10
- class CoolLSTMClassifier(nn.Module):
11
- def __init__(self, vocabSize, embeddingDim, dimHidden, layerAmt, num_classes=2, dropout=0.3):
12
- super(CoolLSTMClassifier, self).__init__()
13
-
14
- self.embedding = nn.Embedding(vocabSize, embeddingDim, padding_idx=0)
15
- self.embedding_dropout = nn.Dropout(0.3)
16
- self.dimHidden = dimHidden
17
-
18
- self.lstm = nn.LSTM(
19
- embeddingDim,
20
- dimHidden,
21
- layerAmt,
22
- batch_first=True,
23
- bidirectional=True,
24
- dropout=dropout if layerAmt > 1 else 0
25
- )
26
-
27
- self.dropout = nn.Dropout(dropout)
28
- self.fc = nn.Linear(dimHidden * 2, num_classes)
29
-
30
- def forward(self, x):
31
- embedded = self.embedding(x)
32
- embedded = self.embedding_dropout(embedded)
33
- lstm_out, (hidden, cell) = self.lstm(embedded)
34
- forward_hidden = hidden[-2, :, :]
35
- backward_hidden = hidden[-1, :, :]
36
- combined = torch.cat([forward_hidden, backward_hidden], dim=1)
37
- combined = self.dropout(combined)
38
- output = self.fc(combined)
39
- return output
40
-
41
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
42
- tokenizer = ToktokTokenizer()
43
 
44
- vocab = None
45
- models = None
46
- embeddingMatrix = None
47
 
48
- def load_resources():
49
- global vocab, models, embeddingMatrix
50
-
51
- if vocab is not None and models is not None:
52
- return
53
-
54
- print("loading vocab and models...")
55
-
56
- with open('data/processed/vocab.pkl', 'rb') as f:
57
- vocab = pickle.load(f)
58
-
59
- embeddingMatrix = np.load('data/processed/embedding_matrix.npy')
60
-
61
- vocabSize = len(vocab)
62
- embeddingDim = 300
63
- dimHidden = 96
64
- layerAmt = 1
65
- num_classes = 2
66
- dropout = 0.5
67
-
68
- models = []
69
- for i in range(1, 6):
70
- model = CoolLSTMClassifier(vocabSize, embeddingDim, dimHidden, layerAmt, num_classes, dropout)
71
- model.load_state_dict(torch.load(f'models/ensemble_model_{i}.pth', map_location=device))
72
- model.embedding.weight.data.copy_(torch.from_numpy(embeddingMatrix))
73
- model.embedding.weight.requires_grad = False
74
- model = model.to(device)
75
- model.eval()
76
- models.append(model)
77
-
78
  print("models loaded")
 
 
79
 
80
- def cleanText(text):
81
- if not text:
82
- return ""
83
- text = str(text)
84
- text = re.sub(r'<[^>]+>', '', text)
85
- text = ' '.join(text.split())
86
- return text
87
-
88
- def cleanTokenize(text):
89
- text = str(text).lower()
90
- text = re.sub(r'[^a-z0-9\s]', '', text)
91
- tokens = tokenizer.tokenize(text)
92
- return tokens
93
-
94
- def predict_review(text):
95
- load_resources()
96
-
97
- cleaned = cleanText(text)
98
- tokens = cleanTokenize(cleaned)
99
-
100
- if len(tokens) == 0:
101
- return "invalid input", 0.0, "n/a"
102
-
103
- indices = [vocab.get(token, vocab['<UNK>']) for token in tokens]
104
-
105
- maxLen = 256
106
- if len(indices) > maxLen:
107
- indices = indices[:maxLen]
108
- else:
109
- indices = indices + [vocab['<PAD>']] * (maxLen - len(indices))
110
-
111
- inpTensor = torch.LongTensor([indices]).to(device)
112
-
113
- allOutputs = []
114
- with torch.no_grad():
115
- for model in models:
116
- outputs = model(inpTensor)
117
- probs = torch.softmax(outputs, dim=1)
118
- allOutputs.append(probs.cpu().numpy())
119
-
120
- avgProbs = np.mean(allOutputs, axis=0)[0]
121
- fakeProb = avgProbs[1]
122
- realProb = avgProbs[0]
123
-
124
- confidence = max(fakeProb, realProb)
125
-
126
- fakeThreshold = 0.75
127
- realThreshold = 0.75
128
-
129
- if fakeProb >= fakeThreshold:
130
- prediction = "fake"
131
- elif realProb >= realThreshold:
132
- prediction = "real"
133
- else:
134
- prediction = "uncertain"
135
-
136
- return prediction, float(confidence), f"fake: {fakeProb:.3f}, real: {realProb:.3f}"
137
 
138
- demo = gr.Interface(
139
- fn=predict_review,
140
- inputs=gr.Textbox(
141
- lines=5,
142
- placeholder="paste review text here",
143
- label="review text"
144
- ),
145
- outputs=[
146
- gr.Textbox(label="prediction"),
147
- gr.Number(label="confidence"),
148
- gr.Textbox(label="probabilities")
149
- ],
150
- title="sentinelcheck",
151
- description="fake review detector using ensemble lstm models (75% threshold)",
152
- examples=[
153
- ["this product is absolutely amazing! i received it for free and it changed my life completely. five stars!"],
154
- ["decent quality for the price. took about a week to arrive. works as expected."]
155
- ]
156
- )
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- if __name__ == "__main__":
159
- demo.launch()
 
 
1
+ from flask import Flask, request, jsonify
2
+ from flask_cors import CORS
 
 
 
 
3
  import os
4
+ import sys
5
 
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+ from api.predict import predict_review, load_resources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ app = Flask(__name__)
10
+ CORS(app)
 
11
 
12
+ print("loading models on startup...")
13
+ try:
14
+ load_resources()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  print("models loaded")
16
+ except Exception as e:
17
+ print(f"couldnt preload models: {e}")
18
 
19
+ @app.route('/health', methods=['GET'])
20
+ def health():
21
+ return jsonify({"status": "ok"}), 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ @app.route('/predict', methods=['POST'])
24
+ def predict():
25
+ try:
26
+ data = request.get_json()
27
+
28
+ if not data or 'text' not in data:
29
+ return jsonify({"error": "missing 'text' field"}), 400
30
+
31
+ reviewText = data['text']
32
+
33
+ if not isinstance(reviewText, str):
34
+ return jsonify({"error": "'text' must be a string"}), 400
35
+
36
+ if len(reviewText.strip()) == 0:
37
+ return jsonify({"error": "text cannot be empty"}), 400
38
+
39
+ result = predict_review(reviewText)
40
+
41
+ if 'error' in result:
42
+ return jsonify({"error": result['error']}), 400
43
+
44
+ return jsonify({
45
+ "prediction": result['prediction'],
46
+ "confidence": result['confidence'],
47
+ "is_fake": result['is_fake'],
48
+ "length_category": result.get('length_category'),
49
+ "token_count": result.get('token_count')
50
+ }), 200
51
+
52
+ except Exception as e:
53
+ return jsonify({"error": str(e)}), 500
54
 
55
+ if __name__ == '__main__':
56
+ print("starting api server")
57
+ app.run(host='0.0.0.0', port=5000, debug=False)