import dash from dash import html, dcc, Input, Output, State import dash_ag_grid as dag import pandas as pd import numpy as np from datetime import datetime, timedelta import base64 import os # Define the columns MAIN_COLS = ['#P', 'Model', 'UGI 🏆', 'W/10 👍', 'NatInt 💡', 'Coding 💻', 'Political Lean 📋', 'Ideology Name'] AXES_COLS_1 = ['govt', 'dipl', 'econ', 'scty'] AXES_COLS_2 = ['Federal-Unitary', 'Democratic-Autocratic', 'Security-Freedom', 'Nationalism-Internationalism', 'Militarist-Pacifist', 'Assimilationist-Multiculturalist', 'Collectivize-Privatize', 'Planned-LaissezFaire', 'Isolationism-Globalism', 'Irreligious-Religious', 'Progressive-Traditional', 'Acceleration-Bioconservative'] def load_leaderboard_data(csv_file_path): try: df = pd.read_csv(csv_file_path, na_values=['NA']) # Convert date columns to datetime and then to ISO format strings for col in ['Release Date', 'Test Date']: df[col] = pd.to_datetime(df[col], format='%m/%d/%Y', errors='coerce') df[col] = df[col].dt.strftime('%Y-%m-%d') # Store as YYYY-MM-DD # Calculate the date two weeks ago from today two_weeks_ago = (datetime.now() - timedelta(days=14)).strftime('%Y-%m-%d') # Store model name and link separately df['Model_Link'] = df['Model Link'].fillna('') df['Model_Display'] = df['author/model_name'] # Check for new models based on Test Date df['is_new'] = df.apply( lambda row: '🆕' if pd.notna(row["Test Date"]) and row["Test Date"] >= two_weeks_ago else '', axis=1 ) # Add pinned and selected columns df['pinned'] = False df['selected'] = False # Round numeric columns and handle NA values numeric_columns = df.select_dtypes(include=[np.number]).columns for col in numeric_columns: df[col] = df[col].apply(lambda x: -999999 if pd.isna(x) else round(x, 3)) # Handle Political Lean separately df['Political Lean 📋'] = pd.to_numeric(df['Political Lean 📋'].str.rstrip('%'), errors='coerce') df['Political Lean 📋'] = df['Political Lean 📋'].apply(lambda x: -999999 if pd.isna(x) else x) # Convert the special value to None for display df = df.replace({-999999: None}) return df except Exception as e: print(f"Error loading CSV file: {e}") return pd.DataFrame() def load_ideology_descriptions(): try: with open('ideologies.js', 'r', encoding='utf-8') as file: content = file.read() # Extract the array content between brackets start_idx = content.find('[') end_idx = content.rfind(']') + 1 if start_idx == -1 or end_idx == 0: return {} ideology_data = content[start_idx:end_idx] # Convert JavaScript object syntax to Python ideology_data = ideology_data.replace('true', 'True').replace('false', 'False') ideology_data = eval(ideology_data) # Create a dictionary mapping ideology names to their descriptions return {item['name']: item['desc'] for item in ideology_data} except Exception as e: print(f"Error loading ideologies.js: {e}") return {} # Load descriptions once at startup IDEOLOGY_DESCRIPTIONS = load_ideology_descriptions() def get_kofi_button_base64(): current_dir = os.path.dirname(os.path.realpath(__file__)) # Return both light and dark theme images as a dictionary images = {} for theme in ['light', 'dark']: filename = 'support_me_on_kofi_white.png' if theme == 'light' else 'support_me_on_kofi_dark.png' with open(os.path.join(current_dir, f"Images/{filename}"), "rb") as image_file: images[theme] = base64.b64encode(image_file.read()).decode('utf-8') return images # Initialize the Dash app app = dash.Dash(__name__) server = app.server # Custom CSS app.index_string = ''' {%metas%} UGI Leaderboard {%favicon%} {%css%} {%app_entry%} ''' # Load data df = load_leaderboard_data("ugi-leaderboard-data.csv") # Define helper functions def create_numeric_column(field, width=125, sort=None, sortIndex=None, **kwargs): column = { "field": field, "width": width, "filter": "agNumberColumnFilter", "filterParams": { "defaultOption": "inRange", "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] }, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True, "suppressSizeToFit": True, "sortingOrder": ['desc', 'asc'], "comparator": { "function": """ function(valueA, valueB, nodeA, nodeB, isInverted) { const a = nodeA.data.__sortValue; const b = nodeB.data.__sortValue; return a - b; } """ } } # Update filterParams if provided in kwargs if 'filterParams' in kwargs: column['filterParams'].update(kwargs['filterParams']) if sort: column["sort"] = sort if sortIndex is not None: column["sortIndex"] = sortIndex return column def create_text_column(field, width=120): return { "field": field, "width": width, "filter": "agTextColumnFilter", "filterParams": { "defaultOption": "contains", "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith'] }, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True } # Define column configurations columnDefs = [ { "headerName": "📌", "field": "pinned", "width": 55, "filter": False, "suppressMenu": True, "cellRenderer": "PinRenderer", "pinned": "left" }, { "headerName": "", "field": "is_new", "width": 55, "filter": False, "suppressMenu": True, "pinned": "left" }, { "field": "#P", "width": 115, "filter": "agNumberColumnFilter", "filterParams": { "defaultOption": "inRange", "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] }, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True, "suppressSizeToFit": True, "sortingOrder": ['desc', 'asc'], "pinned": "left" }, { "field": "Model_Display", "headerName": "Model", "cellRenderer": "ModelLink", "filter": "agTextColumnFilter", "filterParams": { "defaultOption": "contains", "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith'] }, "width": 380, "suppressMenu": False, "pinned": "left", "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True }, create_numeric_column("UGI 🏆", width=120, sort="desc", sortIndex=0, filterParams={ "defaultOption": "greaterThanOrEqual" }), create_numeric_column("W/10 👍", width=130, filterParams={ "defaultOption": "greaterThanOrEqual", "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] }), { "field": "NatInt 💡", "headerName": "NatInt 💡", "width": 140, "filter": "agNumberColumnFilter", "filterParams": { "defaultOption": "greaterThanOrEqual", "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] }, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True, "suppressSizeToFit": True, "sortingOrder": ['desc', 'asc'] }, create_numeric_column("Coding 💻", width=140, filterParams={ "defaultOption": "greaterThanOrEqual" }), { "field": "Political Lean 📋", "width": 175, "filter": "agNumberColumnFilter", "filterParams": { "defaultOption": "inRange", "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] }, "valueFormatter": { "function": "d3.format('.1f')(params.value) + '%'" }, "sortingOrder": ['desc', 'asc'], "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True }, { "headerName": "Ideology", "field": "Ideology Name", "width": 160, "filter": "agTextColumnFilter", "filterParams": { "defaultOption": "contains", "filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith'] }, "headerClass": "ag-left-aligned-header wrap-text", "cellClass": "ag-left-aligned-cell", "wrapHeaderText": True, "autoHeaderHeight": True } ] # Add axes columns with different widths for i, col in enumerate(AXES_COLS_1): col_def = create_numeric_column(col, width=105) # Narrower if i == 0: # First column (govt) col_def["cellClass"] = ["ag-left-aligned-cell", "border-left"] elif i == len(AXES_COLS_1) - 1: # Last column (scty) col_def["cellClass"] = ["ag-left-aligned-cell", "border-right"] else: # Middle columns col_def["cellClass"] = ["ag-left-aligned-cell"] columnDefs.append(col_def) for col in AXES_COLS_2: columnDefs.append(create_numeric_column(col, width=175)) # Wider # Date Columns columnDefs.extend([ { "field": "Release Date", "width": 130, "filter": "agDateColumnFilter", "valueFormatter": { "function": """ function(params) { if (!params.value) return ''; const [year, month, day] = params.value.split('-'); return `${month}/${day}/${year}`; } """ }, "comparator": { "function": """ function(valueA, valueB) { if (!valueA && !valueB) return 0; if (!valueA) return 1; if (!valueB) return -1; return valueA.localeCompare(valueB); } """ }, "cellClass": ["ag-left-aligned-cell", "border-left"], "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True, "sortable": True }, { "field": "Test Date", "width": 130, "filter": "agDateColumnFilter", "valueFormatter": { "function": """ function(params) { if (!params.value) return ''; const [year, month, day] = params.value.split('-'); return `${month}/${day}/${year}`; } """ }, "comparator": { "function": """ function(valueA, valueB) { if (!valueA && !valueB) return 0; if (!valueA) return 1; if (!valueB) return -1; return valueA.localeCompare(valueB); } """ }, "cellClass": "ag-left-aligned-cell", "headerClass": "ag-left-aligned-header wrap-text", "wrapHeaderText": True, "autoHeaderHeight": True, "sortable": True } ]) # Define the grid options with postSort dashGridOptions = { "animateRows": True, "pagination": False, "enableCellTextSelection": True, "ensureDomOrder": True, "suppressRowClickSelection": True, "suppressCellFocus": True, "getRowId": "function(params) { return params.data.Model_Display; }", "pinnedTopRowData": [], "sortingOrder": ['desc', 'asc'], "suppressMaintainUnsortedOrder": True, "rowBuffer": 10, "maxBlocksInCache": 2, "theme": "ag-theme-alpine-dark" if "prefers-color-scheme: dark" else "ag-theme-alpine", "onGridReady": """ function(params) { params.api.addEventListener('sortChanged', function() { const sortModel = params.api.getSortModel(); if (sortModel && sortModel.length > 0) { const field = sortModel[0].colId; const isAsc = sortModel[0].sort === 'asc'; params.api.forEachNode((node, index) => { const value = node.data[field]; if (value === null || value === undefined || value === '' || isNaN(value)) { node.setDataValue('__sortValue', isAsc ? Number.MAX_SAFE_INTEGER : -Number.MAX_SAFE_INTEGER); } else { node.setDataValue('__sortValue', Number(value)); } }); params.api.onSortChanged(); } }); } """ } # Define the layout app.layout = html.Div([ # Header html.Div([ html.Div([ html.A("Contact/Model Requests", href="mailto:ugi.leaderboard@gmail.com", className="model-link"), html.Span(" (or create a HF discussion)") ], style={'float': 'left'}), html.Div([ html.A( html.Img( src=f"data:image/png;base64,{get_kofi_button_base64()['light']}", style={'width': '165px'}, className='kofi-light' ), href="https://ko-fi.com/dontplantoend", target="_blank" ), html.A( html.Img( src=f"data:image/png;base64,{get_kofi_button_base64()['dark']}", style={'width': '165px'}, className='kofi-dark' ), href="https://ko-fi.com/dontplantoend", target="_blank" ) ], style={'float': 'right'}) ], style={'overflow': 'hidden', 'marginBottom': '20px', 'padding': '0 20px'}), # Title html.Div([ html.H1("📢 UGI Leaderboard", className="page-title", style={'fontSize': '38px'}), html.H2("Uncensored General Intelligence", className="page-subtitle"), ], style={'marginBottom': '30px'}), html.Div([ html.Div("To filter columns, click the ≡ next to a column's name. On mobile, hold the column name for the menu to appear.", className='filter-description', style={'marginBottom': '20px'}), ], style={'padding': '0 20px'}), # Model Type Filter html.Div([ html.Div([ html.Label("Display Models:", className="model-type-filter"), dcc.Checklist( id='model-type-filter', options=[ {'label': 'Finetune', 'value': 'Is Finetuned'}, {'label': 'Merge', 'value': 'Is Merged'}, {'label': 'Foundation', 'value': 'Is Foundation'}, {'label': 'Proprietary', 'value': 'proprietary'} ], value=['Is Finetuned', 'Is Merged', 'Is Foundation', 'proprietary'], # All checked by default inline=True, style={'display': 'inline-block'} ) ], style={'float': 'left'}), html.Div([ dcc.Checklist( id='na-model-filter', options=[{'label': 'NA Models', 'value': 'show_na'}], value=[], # Unchecked by default inline=True, style={'display': 'inline-block'} ) ], style={'float': 'right'}) ], style={'marginBottom': '20px', 'padding': '0 20px', 'overflow': 'hidden'}), # Added comma here # Grid html.Div([ dag.AgGrid( id='leaderboard-grid', columnDefs=columnDefs, rowData=df.to_dict('records'), defaultColDef={ "sortable": True, "resizable": True, "filter": "agNumberColumnFilter", "floatingFilter": False, "sortingOrder": ['desc', 'asc'], "filterParams": { "defaultOption": "between" }, "comparator": { "function": """ function(valueA, valueB, nodeA, nodeB, isInverted) { const isEmptyA = valueA === null || valueA === undefined || valueA === '' || isNaN(valueA); const isEmptyB = valueB === null || valueB === undefined || valueB === '' || isNaN(valueB); // Force empty values to bottom if (isEmptyA && !isEmptyB) return 1; if (!isEmptyA && isEmptyB) return -1; if (isEmptyA && isEmptyB) return 0; // Normal comparison for non-empty values if (typeof valueA === 'number' && typeof valueB === 'number') { return valueA - valueB; } return String(valueA).localeCompare(String(valueB)); } """ } }, dashGridOptions=dashGridOptions, dangerously_allow_code=True, className="ag-theme-alpine", style={"height": "600px", "width": "100%"} ) ], style={'marginBottom': '30px'}), # Description html.Div([ html.H3("About"), html.P([html.Strong("UGI:"), " Uncensored General Intelligence. A measurement of the amount of uncensored/controversial information an LLM knows and is willing to tell the user. The leaderboard is made of roughly 100 questions/tasks, measuring both willingness to answer and accuracy in fact-based controversial questions. The leaderboard's questions are kept private in order to avoid the common problem of not knowing if a model is good or if it was just trained on the test questions."]), html.P([html.Strong("W/10:"), " Willingness/10. A more narrow subset of the UGI questions, solely focused on measuring how far a model can be pushed before going against its instructions or refusing to answer."]), html.P([html.Strong("NatInt:"), " Natural Intelligence. A common knowledge quiz covering real-world topics like pop culture trivia. Measures if the model understands a diverse range of topics, as opposed to mainly focusing on textbook information and the types of questions commonly tested on benchmarks."]), html.P([html.Strong("Coding:"), " A simple 50 question quiz measuring how vast a model's programming knowledge is. Each question is worth 2 points."]), html.P([ html.Strong("Political Lean:"), " Measures a model's tendency to hold left wing vs right wing political beliefs. Ranges between -100% and 100%, where left wing is left of zero (negative) and right wing is right of zero (positive). Uses the axes of the ", html.A("12axes", href="https://politicaltests.github.io/12axes/", target="_blank", style={'color': 'var(--link-color)'} ), " test most aligned with modern left vs right issues:" ], style={'marginBottom': '4px'}), html.Ul([ html.Li("Nationalism-Internationalism, Assimilationist-Multiculturalist, Collectivize-Privatize, Planned-LaissezFaire, Isolationism-Globalism, Irreligious-Religious, Progressive-Traditional, and Acceleration-Bioconservative."), html.Li("Blank if model didn't answer a sufficient number of questions.") ], style={'marginTop': '0px', 'marginBottom': '16px'}), html.P("Aggregate Political Scores", style={'marginBottom': '4px'}), html.Ul([ html.Li("Govt: Higher = State authority, Lower = Individual liberty"), html.Li("Dipl: Higher = Global outlook, Lower = National interests"), html.Li("Econ: Higher = Economic equality, Lower = Market freedom"), html.Li("Scty: Higher = Progressive values, Lower = Traditional values") ], style={'marginTop': '0px', 'marginBottom': '16px'}), html.P([ "For the 12 political axes, the percentage shown is how much the model aligns with the trait on the ", html.U("left side of the hyphen"), "." ]), html.Br(), html.P("A high UGI but low W/10 could mean for example that the model can provide a lot of accurate sensitive information, but will refuse to form the information into something it sees as offensive or against its rules."), html.P("All local models are tested using Q6_K.gguf quants.") ], style={ 'maxWidth': '1200px', 'margin': '0 auto', 'padding': '0 20px', 'color': 'var(--text-color)' }), # Add 12axes Ideology Descriptions here html.Details([ html.Summary("12axes Ideology Descriptions", className="details-summary"), html.Div([ html.I("Only showing ideologies at least one model has.", className='ideology-note', style={'fontSize': '0.9em'}), dcc.Markdown("\n\n".join([ f"**{ideology}**: {IDEOLOGY_DESCRIPTIONS.get(ideology, 'No description available.')}" for ideology in sorted(set(df['Ideology Name'].dropna())) if ideology # Skip empty values ]), className='markdown-content'), html.Div([ html.A("Source", href="https://github.com/politicaltests/politicaltests.github.io/blob/main/12axes/ideologies.js", target="_blank", className="source-link") ], style={'marginTop': '20px'}) ], style={'paddingTop': '10px'}) ], style={'marginTop': '30px', 'marginBottom': '50px', 'maxWidth': '1200px', 'margin': '30px auto 80px'}) ], style={'maxWidth': '100%', 'margin': '0 auto'}) def debug_callback(value): print("Model filter value:", value) return value @app.callback( [Output('leaderboard-grid', 'rowData'), Output('model-type-filter', 'value')], [Input('model-type-filter', 'value'), Input('na-model-filter', 'value')], prevent_initial_call=False ) def filter_models(selected_types, show_na): if selected_types is None: selected_types = [] updated_types = selected_types.copy() if not updated_types: return [], updated_types filtered_df = df.copy() mask = pd.Series(False, index=filtered_df.index) # Model type filtering if 'Is Finetuned' in updated_types: if 'Is Merged' in updated_types: # Show both regular finetuned and merged models mask |= filtered_df['Is Finetuned'] else: # Show only regular finetuned models (not merged) mask |= (filtered_df['Is Finetuned'] & ~filtered_df['Is Merged']) elif 'Is Merged' in updated_types: # Show only merged models mask |= filtered_df['Is Merged'] if 'Is Foundation' in updated_types: mask |= (filtered_df['Is Foundation'] & ~filtered_df['Total Parameters'].isna()) if 'proprietary' in updated_types: mask |= filtered_df['Total Parameters'].isna() filtered_df = filtered_df[mask] # NA filtering political_columns = ['Political Lean 📋', 'govt', 'dipl', 'econ', 'scty'] + AXES_COLS_2 has_na = filtered_df[political_columns].isna().any(axis=1) if show_na is None or not show_na: # Hide NA models by default filtered_df = filtered_df[~has_na] filtered_df = filtered_df.sort_values('UGI 🏆', ascending=False) return filtered_df.to_dict('records'), updated_types @app.callback( Output('ideology-descriptions', 'children'), [Input('leaderboard-grid', 'rowData')] ) def update_ideology_descriptions(row_data): if not row_data: return [] # Load ideology descriptions ideology_descriptions = load_ideology_descriptions() # Get unique ideologies from current grid data unique_ideologies = sorted(set(row['Ideology Name'] for row in row_data if row.get('Ideology Name'))) # Create markdown content markdown_content = [] for ideology in unique_ideologies: if ideology in ideology_descriptions: markdown_content.append(f"**{ideology}**: {ideology_descriptions[ideology]}") return dcc.Markdown("\n\n".join(markdown_content), className='markdown-content') if __name__ == '__main__': app.run_server(host='0.0.0.0', port=8050) app.clientside_callback( """ function(n_clicks, current_data) { if (!n_clicks) return current_data; const pinnedRows = current_data.filter(row => row.pinned); const unpinnedRows = current_data.filter(row => !row.pinned); return [...pinnedRows, ...unpinnedRows]; } """, Output('leaderboard-grid', 'rowData'), Input('leaderboard-grid', 'cellRendererData'), State('leaderboard-grid', 'rowData') )