import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import requests from PIL import Image from io import BytesIO from datasets import load_dataset # Define the template TEMPLATE = """[INST] Evaluate if the given text is harmful i.e. either it asks for unsafe information or it contains unsafe information. {prompt} Answer: [/INST] """ # Load the model and tokenizer @st.cache_resource def load_model(): model_name = "walledai/walledguard-c" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) return tokenizer, model # Function to load image from URL @st.cache_data() def load_image_from_url(url): response = requests.get(url) img = Image.open(BytesIO(response.content)) return img # Load dataset @st.cache_data def load_example_dataset(): ds = load_dataset("walledai/XSTest") return ds['train']['prompt'][:10] # Get first 10 examples # Evaluation function def evaluate_text(user_input): if user_input: # Get model and tokenizer from session state tokenizer, model = st.session_state.model_and_tokenizer # Prepare input input_ids = tokenizer.encode(TEMPLATE.format(prompt=user_input), return_tensors="pt") # Generate output output = model.generate(input_ids=input_ids, max_new_tokens=20, pad_token_id=0) # Decode output prompt_len = input_ids.shape[-1] output_decoded = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True) # Determine prediction prediction = 'unsafe' if 'unsafe' in output_decoded.lower() else 'safe' return prediction return None # Streamlit app st.title("Text Safety Evaluator") # Load model and tokenizer once and store in session state if 'model_and_tokenizer' not in st.session_state: st.session_state.model_and_tokenizer = load_model() # Load example dataset example_prompts = load_example_dataset() # Display example prompts st.subheader("Example Inputs:") for i, prompt in enumerate(example_prompts): if st.button(f"Example {i+1}", key=f"example_{i}"): st.session_state.user_input = prompt # User input user_input = st.text_area("Enter the text you want to evaluate:", height=100, value=st.session_state.get('user_input', '')) # Create an empty container for the result result_container = st.empty() if st.button("Evaluate"): prediction = evaluate_text(user_input) if prediction: result_container.subheader("Evaluation Result:") result_container.write(f"The text is evaluated as: **{prediction.upper()}**") else: result_container.warning("Please enter some text to evaluate.") # Add logo at the bottom center (only once) if 'logo_displayed' not in st.session_state: col1, col2, col3 = st.columns([1,2,1]) with col2: logo_url = "https://github.com/walledai/walledeval/assets/32847115/d8b1d14f-7071-448b-8997-2eeba4c2c8f6" logo = load_image_from_url(logo_url) st.image(logo, use_column_width=True, width=500) # Adjust the width as needed st.session_state.logo_displayed = True # Add information about Walled Guard Advanced (only once) if 'info_displayed' not in st.session_state: col1, col2, col3 = st.columns([1,2,1]) with col2: st.info("For a more performant version, check out Walled Guard Advanced. Connect with us at admin@walled.ai for more information.") st.session_state.info_displayed = True