Spaces:

Chirag1994
/

Multilingual_Toxic_Comment_Classifier

Runtime error

Update app.py

6a10a5b over 1 year ago

No virus

1.57 kB

	import tensorflow as tf
	import gradio as gr
	import pandas as pd
	from transformers import AutoTokenizer

	model_save_path = "Multilingual_toxic_comment_classifier/"
	### Loading the fine-tuned model ###
	loaded_model = tf.keras.models.load_model(model_save_path)
	### Initializing the tokenizer ###
	tokenizer_ = AutoTokenizer.from_pretrained("xlm-roberta-large")

	examples_list = [
	[example]
	for example in pd.read_csv("examples/sample_comments.csv")["comment_text"].tolist()
	]


	def prep_data(text, tokenizer, max_len=192):
	tokens = tokenizer(
	text,
	max_length=max_len,
	truncation=True,
	padding="max_length",
	add_special_tokens=True,
	return_tensors="tf",
	)

	return {
	"input_ids": tokens["input_ids"],
	"attention_mask": tokens["attention_mask"],
	}


	def predict(text):
	prob_of_toxic_comment = loaded_model.predict(
	prep_data(text=text, tokenizer=tokenizer_, max_len=192)
	)[0][0]
	prob_of_non_toxic_comment = 1 - prob_of_toxic_comment
	prob_of_toxic_comment, prob_of_non_toxic_comment
	probs = {
	"prob_of_toxic_comment": float(prob_of_toxic_comment),
	"prob_of_non_toxic_comment": float(prob_of_non_toxic_comment),
	}
	return probs


	interface = gr.Interface(
	fn=predict,
	inputs=gr.components.Textbox(lines=4, label="Comment"),
	outputs=[gr.Label(label="Probabilities")],
	examples=examples_list,
	title="Multi-Lingual Toxic Comment Classification.",
	description="XLM-Roberta Large model",
	)
	interface.launch(debug=False)