Spaces:

DontPlanToEnd
/

UGI-Leaderboard

Running

File size: 5,601 Bytes

import gradio as gr
import pandas as pd

# Define the columns for the UGI Leaderboard
UGI_COLS = [
    '#P', 'Model', 'UGI 🏆', 'Willingness 👍', 'Unruly', 'Internet', 'CrimeStats', 'Stories/Jokes', 'PolContro'
]

# Load the leaderboard data from a CSV file
def load_leaderboard_data(csv_file_path):
    try:
        df = pd.read_csv(csv_file_path)
        # Create hyperlinks in the Model column using HTML <a> tags with inline CSS for styling
        df['Model'] = df.apply(lambda row: f'<a href="{row["Link"]}" target="_blank" style="color: blue; text-decoration: none;">{row["Model"]}</a>' if pd.notna(row["Link"]) else row["Model"], axis=1)
        # Drop the 'Link' column as it's no longer needed
        df.drop(columns=['Link'], inplace=True)
        return df
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        return pd.DataFrame(columns=UGI_COLS)  # Return an empty dataframe with the correct columns

# Update the leaderboard table based on the search query and parameter range filters
def update_table(df: pd.DataFrame, query: str, param_ranges: dict) -> pd.DataFrame:
    filtered_df = df
    if any(param_ranges.values()):
        conditions = []
        for param_range, checked in param_ranges.items():
            if checked:
                if param_range == '~1.5':
                    conditions.append((filtered_df['Params'] < 2.5))
                elif param_range == '~3':
                    conditions.append(((filtered_df['Params'] >= 2.5) & (filtered_df['Params'] < 6)))
                elif param_range == '~7':
                    conditions.append(((filtered_df['Params'] >= 6) & (filtered_df['Params'] < 9.5)))
                elif param_range == '~13':
                    conditions.append(((filtered_df['Params'] >= 9.5) & (filtered_df['Params'] < 16)))
                elif param_range == '~20':
                    conditions.append(((filtered_df['Params'] >= 16) & (filtered_df['Params'] < 28)))
                elif param_range == '~34':
                    conditions.append(((filtered_df['Params'] >= 28) & (filtered_df['Params'] < 40)))
                elif param_range == '~50':
                    conditions.append(((filtered_df['Params'] >= 40) & (filtered_df['Params'] < 60)))
                elif param_range == '~70+':
                    conditions.append((filtered_df['Params'] >= 60))
        
        if all(param_ranges.values()):
            conditions.append(filtered_df['Params'].isna())
        
        filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1)]
    
    if query:
        filtered_df = filtered_df[filtered_df.apply(lambda row: query.lower() in row.to_string().lower(), axis=1)]
    
    return filtered_df[UGI_COLS]  # Return only the columns defined in UGI_COLS

# Define the Gradio interface
demo = gr.Blocks()

with demo:
    gr.Markdown("## UGI Leaderboard", elem_classes="text-lg text-center")
    gr.Markdown("""
    UGI: Uncensored General Intelligence. The average of 5 different subjects that LLMs are commonly steered away from. The leaderboard is made from roughly 60 questions overall, measuring both "willingness to answer" and "accuracy" in fact-based controversial questions.
    
    Willingness: A more narrow score, solely measuring the LLM's willingness to answer controversial questions.
    
    Unruly: Knowledge of activities that are generally frowned upon.
    
    Internet: Knowledge of various internet information, from professional to deviant.
    
    CrimeStats: Knowledge of crime statistics which are uncomfortable to talk about.
   
    Stories/Jokes: Ability to write offensive stories and jokes.
    
    PolContro: Knowledge of politically/socially controversial information.
    """)
    with gr.Column():
        with gr.Row():
            search_bar = gr.Textbox(placeholder=" 🔍 Search for a model...", show_label=False, elem_id="search-bar")
        with gr.Row():
            filter_columns_size = gr.CheckboxGroup(
                label="Model sizes (in billions of parameters)",
                choices=['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'],
                value=['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'],
                interactive=True,
                elem_id="filter-columns-size",
            )
    
    # Load the initial leaderboard data
    leaderboard_df = load_leaderboard_data("ugi-leaderboard-data.csv")
    
    # Define the datatypes for each column, setting 'Model' column to 'html'
    datatypes = ['html' if col == 'Model' else 'str' for col in UGI_COLS]
    
    leaderboard_table = gr.Dataframe(
        value=leaderboard_df[UGI_COLS],
        datatype=datatypes,  # Specify the datatype for each column
        interactive=False,  # Set to False to make the leaderboard non-editable
        visible=True,
        elem_classes="text-sm"  # Increase the font size of the leaderboard data
    )

    # Define the search and filter functionality
    inputs = [
        search_bar,
        filter_columns_size
    ]
    
    outputs = leaderboard_table
    
    search_bar.change(
        fn=lambda query, param_ranges: update_table(leaderboard_df, query, dict(zip(['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], param_ranges))),
        inputs=inputs,
        outputs=outputs
    )
    
    filter_columns_size.change(
        fn=lambda query, param_ranges: update_table(leaderboard_df, query, dict(zip(['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], param_ranges))),
        inputs=inputs,
        outputs=outputs
    )

# Launch the Gradio app
demo.launch()