Spaces:
Running
Running
from flask import Flask, request, jsonify | |
from classify_model import model, tokenizer | |
import torch | |
import numpy as np | |
import os | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
import gdown | |
from underthesea import word_tokenize | |
if not os.path.exists("save_weights.pt"): | |
file_id = "1JeQ100QELbCCjCozF5SsHT1ca08Vvfuw" | |
url = f"https://drive.google.com/uc?id={file_id}" | |
print("Downloading save_weights.pt from Google Drive...") | |
gdown.download(url, "save_weights.pt", quiet=False) | |
if not os.path.exists("save_weights.pt"): | |
raise FileNotFoundError("Failed to download save_weights.pt") | |
model.load_state_dict(torch.load("save_weights.pt")) | |
model.eval() | |
app = Flask(__name__) | |
def predict_toxic(sentence): | |
# tokens = rdrsegmenter.tokenize(sentence) | |
# statement = "" | |
# for token in tokens: | |
# statement += " ".join(token) | |
# sentence = statement | |
sentence = word_tokenize(sentence, format="text") | |
sequence = tokenizer.encode(sentence) | |
while len(sequence) == 20: | |
sequence.insert(0, 0) | |
padded = torch.tensor([sequence]) | |
with torch.no_grad(): | |
preds = model(padded) | |
preds = np.argmax(preds.cpu().numpy(), axis=1) | |
return preds[0] | |
def predict(): | |
data = request.json | |
sentence = data.get('sentence', '') | |
if not sentence: | |
return jsonify({'error': 'No sentence provided'}), 400 | |
result = predict_toxic(sentence) | |
print("Dự đoán:", result, type(result)) | |
return jsonify({'toxic': int(result)}) | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 5000)) | |
app.run(host="0.0.0.0", port=port) | |
# app.run(host="0.0.0.0", port=5001) |