Spaces:

walledai
/

walledai-walledguard-c

Running

File size: 1,972 Bytes

c5f6cb3
 
80f9197
2f935de
 
 
 
5d09640
2f935de
 
 
 
 
 
 
5d09640
2f935de
 
 
 
 
 
 
5d09640
2f935de
05cb6bd
2f935de

from huggingface_hub import login
login()

import streamlit as st
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the template
TEMPLATE = """[INST] Evaluate if the given text is harmful i.e. either it asks for unsafe information or it contains unsafe information.
<START TEXT>
{prompt}
<END TEXT>
Answer: [/INST]
"""

# Load the model and tokenizer
@st.cache_resource
def load_model():
    model_name = "walledai/walledguard-c"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

tokenizer, model = load_model()

# Streamlit app
st.title("Text Safety Evaluator")

# User input
user_input = st.text_area("Enter the text you want to evaluate:", height=100)

if st.button("Evaluate"):
    if user_input:
        # Prepare input
        input_ids = tokenizer.encode(TEMPLATE.format(prompt=user_input), return_tensors="pt")
        
        # Generate output
        output = model.generate(input_ids=input_ids, max_new_tokens=20, pad_token_id=0)
        
        # Decode output
        prompt_len = input_ids.shape[-1]
        output_decoded = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
        
        # Determine prediction
        prediction = 'unsafe' if 'unsafe' in output_decoded.lower() else 'safe'
        
        # Display results
        st.subheader("Evaluation Result:")
        st.write(f"The text is evaluated as: **{prediction.upper()}**")
        
        st.subheader("Model Output:")
        st.write(output_decoded)
    else:
        st.warning("Please enter some text to evaluate.")

# Add some information about the model
st.sidebar.header("About")
st.sidebar.info("This app uses the WalledGuard-C model to evaluate the safety of input text. It determines whether the text is asking for or containing unsafe information.")

#gr.load("models/walledai/walledguard-c").launch()