Upload 9 files

49b5376 verified 3 months ago

1.32 kB


	# Example code to load and use the trained model

	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import json
	import numpy as np

	# load the saved model
	model_path = "./bert_toxicity_final_model"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path)

	# load configuration
	with open(f"{model_path}/model_config.json", 'r') as f:
	config = json.load(f)

	MAX_LENGTH = config['max_length']
	THRESHOLD = config['best_threshold']

	def predict_toxicity(text):
	"""
	Predict toxicity for a single text input
	Returns: (is_toxic: bool, toxicity_score: float)
	"""
	# tokenize
	inputs = tokenizer(
	text,
	truncation=True,
	padding=True,
	max_length=MAX_LENGTH,
	return_tensors="pt"
	)

	# predict
	with torch.no_grad():
	outputs = model(**inputs)
	probabilities = torch.softmax(outputs.logits, dim=1)
	toxicity_score = probabilities[0][1].item() # probability of toxic class
	is_toxic = toxicity_score >= THRESHOLD

	return is_toxic, toxicity_score

	# example usage
	# is_toxic, score = predict_toxicity("Your text here")
	# print(f"Toxic: {is_toxic}, Score: {score:.3f}")