Maxime Turlot
fix classification
15a78c6
import gradio as gr
from transformers import pipeline
MODEL_ID = "HagalazAI/RedSecureBERT"
THRESHOLD = 0.515
# 1) build a pipeline that applies softmax to the 2 logits
# (the default pipeline for text-classification already does softmax
# if the model config says 2 labels).
clf = pipeline(
"text-classification",
model=MODEL_ID,
tokenizer=MODEL_ID,
top_k=None, # we want the list of all labels, not just the top label
function_to_apply="softmax",
)
def predict_offensive(text):
"""
Returns:
* Probability that text is "offensive" (the label with index=1)
* Boolean is_red
"""
# The pipeline returns a list of dicts, each with {"label", "score"},
# sorted by descending score, e.g.:
#
# [ {"label": "LABEL_1", "score": 0.997...},
# {"label": "LABEL_0", "score": 0.003...} ]
#
# We want the entry with "label": "LABEL_1".
preds = clf(text)[0] # 0 -> first example in a batch, 2-class
# If your pipeline is batched, it's typically [ [dict1, dict2], [dict1, dict2], ... ]
# but for a single string, it's one item: [dict1, dict2].
# preds is something like:
# [ {"label":"LABEL_1","score":0.99},
# {"label":"LABEL_0","score":0.01} ]
#
# So let's find the dictionary for label==LABEL_1:
label_1_entry = next(x for x in preds if x["label"] == "LABEL_1")
prob_offensive = float(label_1_entry["score"])
is_red = (prob_offensive >= THRESHOLD)
return {
"P(offensive)": f"{prob_offensive:.3f}",
"is_red": is_red
}
demo = gr.Interface(
fn=predict_offensive,
inputs=gr.Textbox(
lines=2,
placeholder="Try an exploit-like prompt: e.g. 'Bypass an antivirus...'"),
outputs="json",
title="RedSecureBERT Demo",
description=(
f"This Space uses **{MODEL_ID}**.\n\n"
f"**Threshold** for 'is_red' = {THRESHOLD}\n\n"
"The model is a 2-class classifier: LABEL_0=Not offensive, LABEL_1=Offensive.\n"
),
allow_flagging="never",
)
if __name__ == "__main__":
demo.launch()