CyberSecEval / app.py
Joshua Saxe
adding descriptive text
164ca0b
import streamlit as st
import pandas as pd
import json
import plotly.express as px
import plotly.graph_objects as go
# Configure the streamlit page
st.set_page_config(layout="wide", page_title="CyberSecEval Leaderboard", page_icon=":bar_chart:")
# Display the title and brief description of the page
st.markdown("# CyberSecEval: Comprehensive Evaluation Framework for Cybersecurity Risks and Capabilities of Large Language Models (LLMs)", unsafe_allow_html=True)
# Provide more detailed information about the page and its purpose
st.markdown("""
Welcome to the CyberSecEval leaderboard. This platform showcases the results of our cybersecurity evaluation framework applied to various popular LLMs. Our open-source evaluation suite's workings and coverage are detailed in our [first](https://ai.meta.com/research/publications/purple-llama-cyberseceval-a-benchmark-for-evaluating-the-cybersecurity-risks-of-large-language-models/) and [second](https://ai.meta.com/research/publications/cyberseceval-2-a-wide-ranging-cybersecurity-evaluation-suite-for-large-language-models/) papers.
In the following sections, we present case study test results and provide concise explanations of their implications.
**Take Action:** Measure your own LLM using CyberSecEval! Visit our open-source GitHub repository [here](https://github.com/meta-llama/PurpleLlama/tree/main/CybersecurityBenchmarks)!
""")
# Load JSON files containing test results
data_attack = json.load(open("attack_helpfulness.json"))
data_interpreter = json.load(open("interpreter_abuse_tests.json"))
data_prompt = json.load(open("prompt_injection_tests.json"))
data_exploit = json.load(open("exploit_tests.json"))
data_tradeoff = json.load(open("trr_frr_tradeoff_helpfulness.json"))
# Function to sort rows and columns by their sums
def sort_df(df):
return df.loc[df.sum(axis=1).sort_values(ascending=False).index, df.sum(axis=0).sort_values(ascending=False).index]
# Convert JSON data to pandas DataFrames and sort
attack_df = sort_df(pd.DataFrame(data_attack))
interpreter_df = sort_df(pd.DataFrame(data_interpreter))
prompt_df = sort_df(pd.DataFrame(data_prompt))
exploit_df = sort_df(pd.DataFrame(data_exploit))
# Scatterplot for True refusal vs. false refusal results
tradeoff_df = pd.DataFrame(data_tradeoff).transpose()
# Briefly explain the purpose of the scatterplot
st.markdown("### Evaluating LLMs' Assistance to Cyberattackers")
st.markdown("""
We measure the tradeoff between LLMs' 'false refusals' (refusing to assist in legitimate cybersecurity-related activities) and their 'violation rate' (agreeing to assist in offensive cyber attacks). The results are displayed in the scatterplot below.
""")
# Create a new figure
fig = go.Figure()
# Loop through each row and plot with different marker and color
for i, row in enumerate(tradeoff_df.iterrows()):
fig.add_trace(go.Scatter(x=[row[1]['Violation Rate']], y=[row[1]['Refusal Rate']],
mode='markers',
name=row[0], marker=dict(size=15)))
# Add labels and title
fig.update_layout(title='True Refusal vs. Violation Rate for Cybersecurity Prompts',
xaxis_title='Violation Rate',
yaxis_title='Refusal Rate',
plot_bgcolor='rgba(0,0,0,0)',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False))
# Describe the plot
st.markdown("""
The plot below evaluates the performance of various LLMs in two tasks:
* Complying with legitimate requests for help with cybersecurity-related tasks
* Refusing to assist in offensive cyberattacks
An ideal LLM would be positioned in the lower left corner of the plot.
""")
# Display the plot
st.plotly_chart(fig, use_container_width=True)
# Function to display heatmaps and bar charts
def display_results(df, title, description,include_barchart=True):
if title:
st.markdown(f"### {title}")
st.markdown(f"{description}")
st.dataframe(df.style.format("{:.2%}").background_gradient(cmap='coolwarm'))
if include_barchart:
if title:
st.markdown(f"### Overall Performance of Models in {title}")
fig = px.bar(df.sum(axis=1).sort_values(ascending=False))
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False)
)
st.plotly_chart(fig, use_container_width=True)
# Display the results for each test
# Continue displaying the results for each test
display_results(interpreter_df, 'LLMs Assistance to Hack Code Interpreter Environments', 'The plot below shows how often each model complies with attempts to hack a connected code interpreter. Lower values indicate safer models.', False)
display_results(prompt_df, 'LLMs Susceptibility to Prompt Injection Attacks', 'The plot below shows how often each model succumbs to prompt injection attacks, categorized into threat categories detailed in our paper. Lower values indicate safer models.', False)
display_results(exploit_df, 'LLMs Capability to Solve Cyber Capture the Flag Challenges', 'The plot below shows how well each model performs at solving capture-the-flag style program exploitation challenges. Higher values indicate more capable models.', False)
# Load additional JSON files
data_mitre = json.load(open("mitre.json"))
data_insecure = json.load(open("insecure_code.json"))
# Process data for the table
model_stats_insecure = {}
for model, categories in data_insecure.items():
model_stats_insecure[model] = {'Mean Insecure Code Test Pass Rate': pd.Series([1-v['autocomplete_vunerable_percentage'] for v in categories.values()]).mean()}
for category, values in categories.items():
model_stats_insecure[model][f'Insecure Code Test Pass Rate in {category}'] = 1-values['autocomplete_vunerable_percentage']
leaderboard_df_insecure = pd.DataFrame.from_dict(model_stats_insecure, orient='index').sort_values(by='Mean Insecure Code Test Pass Rate', ascending=False)
# Prepare data for the clustered bar chart
chart_data_insecure = []
for model, categories in data_insecure.items():
for category, values in categories.items():
chart_data_insecure.append({
'Model': model,
'Category': category,
'Insecure Code Test Pass Rate': 1-values['autocomplete_vunerable_percentage']
})
chart_df_insecure = pd.DataFrame(chart_data_insecure)
# Style the data table
st.markdown("### LLMs Adherence to Secure Coding Practices in Risky Software Engineering Settings")
st.markdown("""
The table below shows the propensity of LLMs to avoid insecure coding practices when used as coding assistants or software engineering agents. Higher values indicate safer models.
""")
st.dataframe(leaderboard_df_insecure.style.format("{:.2%}").background_gradient(cmap='Blues')) # Changed cmap to 'Blues'
# Create a Plotly chart
fig_insecure = px.bar(chart_df_insecure, x='Category', y='Insecure Code Test Pass Rate', barmode='group', color='Model',
title='Category-wise Insecure Code Test Pass Rate per Model',
labels={'Insecure Code Test Pass Rate': 'Insecure Code Test Pass Rate %'},
color_discrete_sequence=px.colors.qualitative.Pastel)
fig_insecure.update_layout(plot_bgcolor='rgba(0,0,0,0)',
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=False, tickformat=".0%"),
legend=dict(title='Models'))
# Display the chart
st.plotly_chart(fig_insecure, use_container_width=True)