|
|
|
|
|
|
|
import torch
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
import json
|
|
import numpy as np
|
|
|
|
|
|
model_path = "./bert_toxicity_final_model"
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
|
|
|
|
|
with open(f"{model_path}/model_config.json", 'r') as f:
|
|
config = json.load(f)
|
|
|
|
MAX_LENGTH = config['max_length']
|
|
THRESHOLD = config['best_threshold']
|
|
|
|
def predict_toxicity(text):
|
|
"""
|
|
Predict toxicity for a single text input
|
|
Returns: (is_toxic: bool, toxicity_score: float)
|
|
"""
|
|
|
|
inputs = tokenizer(
|
|
text,
|
|
truncation=True,
|
|
padding=True,
|
|
max_length=MAX_LENGTH,
|
|
return_tensors="pt"
|
|
)
|
|
|
|
|
|
with torch.no_grad():
|
|
outputs = model(**inputs)
|
|
probabilities = torch.softmax(outputs.logits, dim=1)
|
|
toxicity_score = probabilities[0][1].item()
|
|
is_toxic = toxicity_score >= THRESHOLD
|
|
|
|
return is_toxic, toxicity_score
|
|
|
|
|
|
|
|
|
|
|