toxic_comment / app.py
nhanne24's picture
Test
a560dfb verified
from flask import Flask, request, jsonify
from classify_model import model, tokenizer
import torch
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import gdown
from underthesea import word_tokenize
if not os.path.exists("save_weights.pt"):
file_id = "1JeQ100QELbCCjCozF5SsHT1ca08Vvfuw"
url = f"https://drive.google.com/uc?id={file_id}"
print("Downloading save_weights.pt from Google Drive...")
gdown.download(url, "save_weights.pt", quiet=False)
if not os.path.exists("save_weights.pt"):
raise FileNotFoundError("Failed to download save_weights.pt")
model.load_state_dict(torch.load("save_weights.pt"))
model.eval()
app = Flask(__name__)
def predict_toxic(sentence):
# tokens = rdrsegmenter.tokenize(sentence)
# statement = ""
# for token in tokens:
# statement += " ".join(token)
# sentence = statement
sentence = word_tokenize(sentence, format="text")
sequence = tokenizer.encode(sentence)
while len(sequence) == 20:
sequence.insert(0, 0)
padded = torch.tensor([sequence])
with torch.no_grad():
preds = model(padded)
preds = np.argmax(preds.cpu().numpy(), axis=1)
return preds[0]
@app.route('/predict', methods=['POST'])
def predict():
data = request.json
sentence = data.get('sentence', '')
if not sentence:
return jsonify({'error': 'No sentence provided'}), 400
result = predict_toxic(sentence)
print("Dự đoán:", result, type(result))
return jsonify({'toxic': int(result)})
if __name__ == "__main__":
port = int(os.environ.get("PORT", 5000))
app.run(host="0.0.0.0", port=port)
# app.run(host="0.0.0.0", port=5001)