Spaces:
Running
Running
import dash | |
from dash import html, dcc, Input, Output, State | |
import dash_ag_grid as dag | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
import base64 | |
import os | |
def load_leaderboard_data(csv_file_path): | |
try: | |
df = pd.read_csv(csv_file_path, na_values=['NA']) | |
# Add type sort value | |
def get_type_sort_value(row): | |
if row['Base']: | |
return 0 # B | |
return 4 | |
df['model_type_sort'] = df.apply(get_type_sort_value, axis=1) | |
# Store model name and link separately | |
df['Model_Link'] = df['Model Link'].fillna('') | |
df['Model_Display'] = df['author/model_name'] | |
# Add pinned and selected columns | |
df['pinned'] = False | |
df['selected'] = False | |
# Round numeric columns and handle NA values | |
numeric_columns = df.select_dtypes(include=[np.number]).columns | |
for col in numeric_columns: | |
df[col] = df[col].apply(lambda x: None if pd.isna(x) else round(x, 3)) | |
# Sort with multiple keys in the required order | |
df = df.sort_values( | |
by=['Score', '8k 🪡', '16k 🪡'], | |
ascending=[False, False, False] # Score -> 8k -> 16k | |
) | |
return df | |
except Exception as e: | |
print(f"Error loading CSV file: {e}") | |
return pd.DataFrame() | |
# Initialize the Dash app | |
app = dash.Dash(__name__, external_stylesheets=[ | |
"https://use.fontawesome.com/releases/v5.15.4/css/all.css" | |
]) | |
server = app.server | |
# Custom CSS | |
app.index_string = ''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
{%metas%} | |
<title>Fey's Multi-Needle & Behavior Leaderboard</title> | |
{%favicon%} | |
{%css%} | |
<style> | |
:root { | |
--bg-color: #ffffff; | |
--text-color: #000000; | |
--grid-bg: #ffffff; | |
--grid-border: #ddd; | |
--link-color: #007bff; | |
--secondary-text: #666; | |
--pinned-bg: #f5f5f5; | |
--border-color: #ccc; | |
} | |
@media (prefers-color-scheme: dark) { | |
:root { | |
--bg-color: #0d1117; | |
--text-color: #e6e6e6; | |
--grid-bg: #161b22; | |
--grid-border: #30363d; | |
--link-color: #58a6ff; | |
--secondary-text: #8b949e; | |
--pinned-bg: #1c2128; | |
--border-color: #30363d; | |
color-scheme: dark; | |
} | |
.ag-theme-alpine .ag-menu { | |
background-color: #161b22 !important; | |
color: #e6e6e6 !important; | |
border-color: #30363d !important; | |
} | |
.ag-theme-alpine .ag-filter-condition { | |
background-color: #161b22 !important; | |
border-color: #30363d !important; | |
} | |
.ag-theme-alpine .ag-mini-filter input, | |
.ag-theme-alpine .ag-filter input { | |
background-color: #0d1117 !important; | |
color: #e6e6e6 !important; | |
border-color: #30363d !important; | |
} | |
.ag-theme-alpine .ag-select .ag-picker-field-wrapper { | |
background-color: #0d1117 !important; | |
color: #e6e6e6 !important; | |
border-color: #30363d !important; | |
} | |
.ag-theme-alpine .ag-picker-field-wrapper { | |
border-color: #30363d !important; | |
} | |
.ag-theme-alpine .ag-select-list { | |
background-color: #161b22 !important; | |
color: #e6e6e6 !important; | |
} | |
.ag-theme-alpine .ag-select-list-item:hover { | |
background-color: #1c2128 !important; | |
} | |
.ag-theme-alpine input[type="date"] { | |
color-scheme: dark; | |
background-color: #161b22; | |
color: #e6e6e6; | |
border-color: #30363d; | |
} | |
.ag-theme-alpine input[type="date"]::-webkit-calendar-picker-indicator { | |
background-color: #161b22; | |
cursor: pointer; | |
filter: invert(0.8); | |
} | |
} | |
body { | |
font-family: 'Segoe UI', Arial, sans-serif; | |
margin: 0; | |
padding: 20px; | |
background-color: var(--bg-color); | |
color: var(--text-color); | |
} | |
/* Header and Title Styles */ | |
.page-title { | |
text-align: center; | |
margin: 0; | |
font-size: 38px; | |
color: var(--text-color) !important; | |
} | |
.page-subtitle { | |
text-align: center; | |
margin: 0; | |
font-size: 20px; | |
font-weight: 600; | |
color: var(--text-color) !important; | |
} | |
/* Filter Styles */ | |
.model-type-filter { | |
color: var(--text-color) !important; | |
margin-right: 10px; | |
font-weight: bold; | |
}, | |
#model-type-filter label, | |
#na-model-filter label { | |
color: var(--text-color) !important; | |
margin-right: 10px; | |
font-weight: bold; | |
} | |
/* Grid Styles */ | |
.ag-theme-alpine { | |
--ag-font-family: 'Segoe UI', Arial, sans-serif; | |
--ag-font-size: 14px; | |
--ag-background-color: var(--grid-bg); | |
--ag-border-color: var(--grid-border); | |
--ag-header-background-color: var(--grid-bg); | |
--ag-odd-row-background-color: var(--grid-bg); | |
--ag-header-foreground-color: var(--text-color); | |
--ag-foreground-color: var(--text-color); | |
--ag-row-border-color: var(--grid-border); | |
} | |
.ag-theme-alpine .ag-pinned-left-header, | |
.ag-theme-alpine .ag-cell-last-left-pinned { | |
border-right: 2px solid var(--grid-border) !important; | |
margin-right: -1px !important; | |
} | |
/* Mobile specific fixes */ | |
.ag-theme-alpine.ag-grid-mobile .ag-pinned-left-header, | |
.ag-theme-alpine.ag-grid-mobile .ag-cell-last-left-pinned { | |
border-right: 2px solid var(--grid-border) !important; | |
} | |
/* Ensure pinned columns maintain their position */ | |
.ag-theme-alpine .ag-pinned-left-cols-container, | |
.ag-theme-alpine .ag-pinned-left-header { | |
position: sticky; | |
left: 0; | |
z-index: 1; | |
} | |
.ag-floating-top { | |
border-bottom: 3px solid var(--border-color) !important; | |
} | |
.ag-floating-top:empty { | |
border-bottom: none !important; | |
} | |
.pinned-row { | |
background-color: var(--pinned-bg) !important; | |
font-weight: 500; | |
} | |
/* Text Alignment Classes */ | |
.ag-left-aligned-header { | |
text-align: left !important; | |
} | |
.ag-left-aligned-cell { | |
text-align: left !important; | |
} | |
.ag-header-cell-text { | |
white-space: normal !important; | |
line-height: 1.2em; | |
overflow: visible; | |
padding-bottom: 4px; | |
} | |
.ag-header-cell { | |
height: auto !important; | |
min-height: 48px; | |
} | |
.wrap-text { | |
white-space: normal !important; | |
line-height: 1.2em; | |
} | |
.no-break { | |
white-space: nowrap !important; | |
} | |
/* Border Classes */ | |
.border-left { | |
border-left: 2px solid var(--grid-border) !important; | |
margin-left: -2px !important; | |
} | |
.border-right { | |
border-right: 2px solid var(--grid-border) !important; | |
} | |
/* Link Styles */ | |
.model-link { | |
color: var(--link-color) !important; | |
text-decoration: none; | |
} | |
.model-link:visited { | |
color: var(--link-color) !important; | |
} | |
.model-link:active { | |
color: var(--link-color) !important; | |
} | |
.model-link:focus { | |
color: var(--link-color) !important; | |
} | |
.ag-theme-alpine a, | |
.ag-theme-alpine a:link, | |
.ag-theme-alpine a:visited, | |
.ag-theme-alpine a:hover, | |
.ag-theme-alpine a:active, | |
.ag-theme-alpine a:focus { | |
color: var(--link-color) !important; | |
text-decoration: none !important; | |
} | |
.ag-theme-alpine a:hover { | |
text-decoration: underline !important; | |
} | |
.source-link { | |
color: var(--link-color) !important; | |
text-decoration: none; | |
} | |
/* Details/Summary Styles */ | |
.details-summary { | |
cursor: pointer; | |
font-weight: bold; | |
font-size: 1.2em; | |
margin-top: 20px; | |
color: var(--text-color) !important; | |
} | |
.ideology-note { | |
color: var(--secondary-text) !important; | |
font-size: 0.9em; | |
} | |
/* Markdown Content */ | |
.markdown-content { | |
color: var(--text-color) !important; | |
} | |
.markdown-content a { | |
color: var(--link-color) !important; | |
} | |
/* Ko-fi Button Visibility */ | |
.kofi-light { | |
display: none; | |
} | |
.kofi-dark { | |
display: none; | |
} | |
@media (prefers-color-scheme: light) { | |
.kofi-light { | |
display: block; | |
} | |
} | |
@media (prefers-color-scheme: dark) { | |
.kofi-dark { | |
display: block; | |
} | |
/* Dark Theme Specific Overrides */ | |
.ag-theme-alpine { | |
--ag-background-color: #161b22 !important; | |
--ag-header-background-color: #161b22 !important; | |
--ag-odd-row-background-color: #161b22 !important; | |
--ag-row-background-color: #161b22 !important; | |
--ag-header-foreground-color: #e6e6e6 !important; | |
--ag-foreground-color: #e6e6e6 !important; | |
--ag-row-border-color: #30363d !important; | |
--ag-border-color: #30363d !important; | |
--ag-secondary-border-color: #30363d !important; | |
--ag-alpine-active-color: #58a6ff !important; | |
--ag-selected-row-background-color: #1c2128 !important; | |
--ag-row-hover-color: #1c2128 !important; | |
} | |
.ag-header-cell-filtered { | |
background-color: rgba(88, 166, 255, 0.1) !important; | |
} | |
input[type="checkbox"] { | |
accent-color: var(--link-color); | |
} | |
/* Ensure text colors in dark mode */ | |
.page-title, | |
.page-subtitle, | |
.model-type-filter label, | |
#model-type-filter label, | |
#na-model-filter label { | |
color: #e6e6e6 !important; | |
} | |
.filter-description, | |
.ideology-note { | |
color: #8b949e !important; | |
} | |
} | |
a:visited { | |
color: var(--link-color) !important; | |
} | |
.markdown-content a:visited { | |
color: var(--link-color) !important; | |
} | |
.split-header-container { | |
display: flex; | |
flex-direction: column; | |
line-height: 1.2em; | |
} | |
.split-header-top, .split-header-bottom { | |
white-space: nowrap; | |
} | |
.ag-theme-alpine .new-emoji-cell.ag-cell { | |
font-size: 18px !important; | |
display: flex !important; | |
align-items: center !important; | |
justify-content: flex-start !important; | |
padding-left: 12px !important; | |
} | |
.ag-ltr .ag-cell { | |
border-right-width: 1px; | |
justify-content: center; | |
display: flex; | |
} | |
.ag-header-cell-label { | |
overflow: hidden; | |
text-overflow: ellipsis; | |
justify-content: center; | |
} | |
div.ag-cell.ag-cell-not-inline-editing.ag-cell-normal-height.ag-cell-last-left-pinned { | |
justify-content: right; | |
} | |
</style> | |
</head> | |
<body> | |
{%app_entry%} | |
<footer> | |
{%config%} | |
{%scripts%} | |
{%renderer%} | |
</footer> | |
</body> | |
</html> | |
''' | |
# Load data | |
df = load_leaderboard_data("fmnb-leaderboard-data.csv") | |
# Define helper functions | |
def create_numeric_column(field, width=125, sort=None, sortIndex=None, **kwargs): | |
column = { | |
"field": field, | |
"width": width, | |
"filter": "agNumberColumnFilter", | |
"filterParams": { | |
"defaultOption": "inRange", | |
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] | |
}, | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": "ag-left-aligned-cell", | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True, | |
"suppressSizeToFit": True, | |
"sortingOrder": ['desc', 'asc'], | |
"comparator": { | |
"function": """ | |
function(valueA, valueB, nodeA, nodeB, isInverted) { | |
const a = nodeA.data.__sortValue; | |
const b = nodeB.data.__sortValue; | |
return a - b; | |
} | |
""" | |
} | |
} | |
# Update filterParams if provided in kwargs | |
if 'filterParams' in kwargs: | |
column['filterParams'].update(kwargs['filterParams']) | |
if sort: | |
column["sort"] = sort | |
if sortIndex is not None: | |
column["sortIndex"] = sortIndex | |
return column | |
def create_text_column(field, width=120): | |
return { | |
"field": field, | |
"width": width, | |
"filter": "agTextColumnFilter", | |
"filterParams": { | |
"defaultOption": "contains", | |
"filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith'] | |
}, | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": "ag-left-aligned-cell", | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True | |
} | |
# Define column configurations | |
columnDefs = [ | |
{ | |
"headerName": "📌", | |
"field": "pinned", | |
"width": 55, | |
"filter": False, | |
"suppressMenu": True, | |
"cellRenderer": "PinRenderer", | |
"sortable": False, | |
"pinned": "left" | |
}, | |
{ | |
"field": "Model_Display", | |
"headerName": "Model", | |
"cellRenderer": "ModelLink", | |
"filter": "agTextColumnFilter", | |
"filterParams": { | |
"defaultOption": "contains", | |
"filterOptions": ['contains', 'notContains', 'startsWith', 'endsWith'] | |
}, | |
"width": 420, | |
"suppressMenu": False, | |
"pinned": "left", | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True | |
}, | |
{ | |
"field": "Score", | |
"FontWeight": 700, | |
"width": 110, | |
"filter": "agNumberColumnFilter", | |
"filterParams": { | |
"defaultOption": "greaterThanOrEqual" | |
}, | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": ["ag-left-aligned-cell", "border-left"], | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True, | |
"suppressSizeToFit": True, | |
"sortingOrder": ['desc', 'asc'], | |
"cellRenderer": "ScoreRenderer" | |
}, | |
{ | |
"headerName": "Behavior", | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": ["ag-left-aligned-cell", "border-left"], | |
"field": "Behavior", | |
"width": 120, | |
"filter": False, | |
"suppressMenu": True, | |
"cellRenderer": "BehaviorRenderer", | |
"sortable": True, | |
"sortingOrder": ['asc', 'desc'] | |
}, | |
{ | |
"field": "8k 🪡", | |
"headerName": "8k 🪡", | |
"width": 100, | |
"filter": "agNumberColumnFilter", | |
"filterParams": { | |
"defaultOption": "greaterThanOrEqual", | |
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] | |
}, | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": ["ag-left-aligned-cell", "border-left"], | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True, | |
"suppressSizeToFit": True, | |
"sortingOrder": ['desc', 'asc'] | |
}, | |
create_numeric_column("16k 🪡", width=100, filterParams={ | |
"defaultOption": "greaterThanOrEqual" | |
}), | |
# Misc Columns | |
{ | |
"field": "Size", | |
"width": 100, | |
"filter": "agNumberColumnFilter", | |
"filterParams": { | |
"defaultOption": "equals", | |
"filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange'] | |
}, | |
"headerClass": "ag-left-aligned-header wrap-text", | |
"cellClass": "ag-left-aligned-cell", | |
"wrapHeaderText": True, | |
"autoHeaderHeight": True, | |
"suppressSizeToFit": True, | |
"sortingOrder": ['desc', 'asc'], | |
}, | |
{ | |
"field": "model_type_sort", | |
"hide": True | |
}, | |
{ | |
"headerName": "Type", | |
"field": "model_type_sort", # sort field directly | |
"width": 90, | |
"filter": False, | |
"suppressMenu": True, | |
"cellRenderer": "TypeRenderer", | |
"sortable": True, | |
"sortingOrder": ['asc', 'desc'] | |
}, | |
{ | |
"headerName": "Settings", | |
"field": "Settings", | |
"width": 120, | |
"filter": False, | |
"suppressMenu": True, | |
"cellClass": "ag-left-aligned-cell", | |
}, | |
{ | |
"headerName": "New", | |
"field": "New", | |
"width": 70, | |
"filter": False, | |
"suppressMenu": True, | |
"cellClass": "ag-left-aligned-cell", | |
} | |
] | |
# Define the grid options with postSort | |
dashGridOptions = { | |
"animateRows": True, | |
"pagination": False, | |
"enableCellTextSelection": True, | |
"ensureDomOrder": True, | |
"suppressRowClickSelection": True, | |
"suppressCellFocus": True, | |
"getRowId": "params => params.data.Model_Display", | |
"pinnedTopRowData": [], | |
"suppressMaintainUnsortedOrder": True, | |
"suppressMultiSort": True, | |
"rowBuffer": 10, | |
"maxBlocksInCache": 2, | |
"icons": { | |
"menu": '<i class="fas fa-search" style="color: var(--text-color)"></i>' | |
}, | |
"theme": "ag-theme-alpine-dark" if "prefers-color-scheme: dark" else "ag-theme-alpine", | |
"columnState": { | |
"function": """ | |
function() { | |
return { | |
columnVisibility: {} | |
}; | |
} | |
""" | |
} | |
} | |
# Define the layout | |
app.layout = html.Div([ | |
dcc.Store(id='pinned-models-store', data=[]), | |
# Title | |
html.Div([ | |
html.H1("🪡 Fey's MNB Leaderboard 🪡", | |
className="page-title", | |
style={'fontSize': '38px'}), | |
html.H2("Multi-Needle & Behavior Evaluation", | |
className="page-subtitle"), | |
], style={'marginBottom': '30px'}), | |
# Notice | |
# html.Div( | |
# html.P( | |
# "None", | |
# style={'textAlign': 'center', 'color': 'red', 'fontSize': '0.9em'} | |
# ) | |
# ), | |
# Grid | |
html.Div([ | |
dag.AgGrid( | |
id='leaderboard-grid', | |
columnDefs=columnDefs, | |
rowData=df.to_dict('records'), | |
defaultColDef={ | |
"sortable": True, | |
"resizable": True, | |
"filter": "agNumberColumnFilter", | |
"floatingFilter": False, | |
"sortingOrder": ['desc', 'asc'], | |
"filterParams": { | |
"defaultOption": "between" | |
}, | |
"comparator": { | |
"function": """ | |
function(valueA, valueB, nodeA, nodeB, isInverted) { | |
const isEmptyA = valueA === null || valueA === undefined || valueA === '' || isNaN(valueA); | |
const isEmptyB = valueB === null || valueB === undefined || valueB === '' || isNaN(valueB); | |
// Force empty values to bottom | |
if (isEmptyA && !isEmptyB) return 1; | |
if (!isEmptyA && isEmptyB) return -1; | |
if (isEmptyA && isEmptyB) return 0; | |
// Normal comparison for non-empty values | |
if (typeof valueA === 'number' && typeof valueB === 'number') { | |
return valueA - valueB; | |
} | |
return String(valueA).localeCompare(String(valueB)); | |
} | |
""" | |
} | |
}, | |
dashGridOptions=dashGridOptions, | |
dangerously_allow_code=True, | |
className="ag-theme-alpine", | |
style={"height": "600px", "width": "100%"}, | |
enableEnterpriseModules=False, | |
getRowId="params.data.Model_Display" | |
) | |
], style={'marginBottom': '10px'}), | |
# Description | |
html.Div([ | |
html.H3("Info", style={'fontSize': '22px', 'marginBottom': '0px'}), | |
html.P([html.Strong(""), "This latest reiteration of the leaderboard has finally made it to Hugging Face with extended functionality based on the UGI leaderboard, enjoy!"], | |
style={'marginTop': '7px', 'marginBottom': '4px'}), | |
html.P([html.Strong("Score:"), " Primarily based on the scoring in the multi-needle test at 8k / 16k context, weighted towards 16k."], | |
style={'marginTop': '7px', 'marginBottom': '4px'}), | |
html.P([html.Strong("Behavior:"), " Qualitative assessment of the model's behavior during the evaluation. User discretion is advised, as it only has a minor impact on the final score."], style={'marginTop': '7px', 'marginBottom': '4px'}), | |
html.P([html.Strong("Difficulty:"), " The current difficulty is only set at a medium level. (Silver Tier 1)"], style={'marginTop': '7px', 'marginBottom': '4px'}), | |
html.P([html.Strong("Version:"), " 4.0"], style={'marginTop': '7px', 'marginBottom': '4px'}), | |
html.Br(), | |
html.Details([ | |
html.Summary("Recommended Settings", | |
style={ | |
'fontWeight': 'bold', | |
'fontSize': '1em', | |
'marginLeft': '0px', | |
'cursor': 'pointer' | |
}), | |
html.Ul([ | |
html.Br(), | |
html.Li(["1: Recommended to manually set a RoPE Frequency of 2650000 with Nemo based models when using >8k context.",html.Br(),"LLama.cpp: --rope-freq-base 2650000 (RoPE Base in KoboldCpp)",html.Br(),"EXL2: rope_alpha 2.65"]), | |
html.Br(), | |
html.Li("2: Recommended to set </s> as an additional stopping token when using these models with ChatML."), | |
], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'}) | |
], style={'marginBottom': '16px'}), | |
], style={ | |
'maxWidth': '1200px', | |
'margin': '0 auto', | |
'padding': '0 20px', | |
'color': 'var(--text-color)' | |
}), | |
], style={'maxWidth': '100%', 'margin': '0 auto'}) | |
if __name__ == '__main__': | |
app.run_server(host='0.0.0.0', port=8050) | |
app.clientside_callback( | |
""" | |
function(n_clicks) { | |
if (!window.gridApi) return; | |
const pinnedRows = window.gridApi.getGridOption('pinnedTopRowData') || []; | |
if (pinnedRows.length > 0) { | |
const pinnedIds = new Set(pinnedRows.map(row => row.Model_Display)); | |
const currentRows = []; | |
window.gridApi.forEachNode(node => { | |
if (!pinnedIds.has(node.data.Model_Display)) { | |
currentRows.push(node.data); | |
} | |
}); | |
window.gridApi.setGridOption('rowData', currentRows); | |
} | |
return window.dash_clientside.no_update; | |
} | |
""", | |
Output('leaderboard-grid', 'rowData'), | |
Input('model-type-filter', 'value') | |
) |