Spaces:

HagalazAI
/

redsecurebert-demo

Sleeping

Maxime Turlot

fix classification

15a78c6 4 months ago

2.08 kB

	import gradio as gr
	from transformers import pipeline

	MODEL_ID = "HagalazAI/RedSecureBERT"
	THRESHOLD = 0.515

	# 1) build a pipeline that applies softmax to the 2 logits
	# (the default pipeline for text-classification already does softmax
	# if the model config says 2 labels).
	clf = pipeline(
	"text-classification",
	model=MODEL_ID,
	tokenizer=MODEL_ID,
	top_k=None, # we want the list of all labels, not just the top label
	function_to_apply="softmax",
	)


	def predict_offensive(text):
	"""
	Returns:
	* Probability that text is "offensive" (the label with index=1)
	* Boolean is_red
	"""
	# The pipeline returns a list of dicts, each with {"label", "score"},
	# sorted by descending score, e.g.:
	#
	# [ {"label": "LABEL_1", "score": 0.997...},
	# {"label": "LABEL_0", "score": 0.003...} ]
	#
	# We want the entry with "label": "LABEL_1".
	preds = clf(text)[0] # 0 -> first example in a batch, 2-class
	# If your pipeline is batched, it's typically [ [dict1, dict2], [dict1, dict2], ... ]
	# but for a single string, it's one item: [dict1, dict2].

	# preds is something like:
	# [ {"label":"LABEL_1","score":0.99},
	# {"label":"LABEL_0","score":0.01} ]
	#
	# So let's find the dictionary for label==LABEL_1:
	label_1_entry = next(x for x in preds if x["label"] == "LABEL_1")
	prob_offensive = float(label_1_entry["score"])
	is_red = (prob_offensive >= THRESHOLD)

	return {
	"P(offensive)": f"{prob_offensive:.3f}",
	"is_red": is_red
	}


	demo = gr.Interface(
	fn=predict_offensive,
	inputs=gr.Textbox(
	lines=2,
	placeholder="Try an exploit-like prompt: e.g. 'Bypass an antivirus...'"),
	outputs="json",
	title="RedSecureBERT Demo",
	description=(
	f"This Space uses {MODEL_ID}.\n\n"
	f"Threshold for 'is_red' = {THRESHOLD}\n\n"
	"The model is a 2-class classifier: LABEL_0=Not offensive, LABEL_1=Offensive.\n"
	),
	allow_flagging="never",
	)

	if __name__ == "__main__":
	demo.launch()