Spaces:

vector-institute
/

HumaniBench

Running

App Files Files Community

wandra13 commited on 12 days ago

Commit

06b3f52

1 Parent(s): 1efe882

Update app.py with new model entries and logos; enhance CSS for improved UI. Added new assets including HumaniBench logo and vector favicon. Refactored styles for better layout and responsiveness.

Browse files

Files changed (6) hide show

.gitattributes +1 -0
app.py +206 -224
src/assets/HumaniBenchLogo.ico +0 -0
src/assets/teaser_figure_humanibench.png +3 -0
src/assets/vector-favicon-48x48.svg +9 -0
src/display/css_html_js.py +192 -547

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
+src/assets/teaser_figure_humanibench.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -14,55 +14,52 @@ GITHUB_URL = "https://github.com/VectorInstitute/humaniBench"
 DATASET_URL = "https://huggingface.co/datasets/vector-institute/HumaniBench"
 WEBSITE_URL = "https://vectorinstitute.github.io/humanibench/"
-black_logo_path = "src/assets/logo-icon-black.png"
-white_logo_path = "src/assets/logo-icon-white.png"
 # ========================
-# MODEL REGISTRY
-# Ordered by overall performance (update once paper results are confirmed)
 # ========================
 MODELS = [
-    {"model": "GPT-4o",             "link": "https://openai.com/gpt-4",                                             "org": "OpenAI",        "params": "-",   "type": "Closed"},
-    {"model": "Gemini-2.0-Flash",   "link": "https://deepmind.google/technologies/gemini/",                         "org": "Google",        "params": "-",   "type": "Closed"},
-    {"model": "InternVL2-26B",      "link": "https://huggingface.co/OpenGVLab/InternVL2-26B",                       "org": "OpenGVLab",     "params": "26B", "type": "Open"},
-    {"model": "InternVL-Chat-v1.5", "link": "https://huggingface.co/OpenGVLab/InternVL-Chat-V1-5",                  "org": "OpenGVLab",     "params": "26B", "type": "Open"},
-    {"model": "LLaVA-NeXT-34B",     "link": "https://huggingface.co/lmms-lab/llava-next-34b",                       "org": "lmms-lab",      "params": "34B", "type": "Open"},
-    {"model": "LLaVA-NeXT-13B",     "link": "https://huggingface.co/lmms-lab/llava-next-vicuna-13b",                "org": "lmms-lab",      "params": "13B", "type": "Open"},
-    {"model": "LLaVA-1.5-13B",      "link": "https://huggingface.co/llava-hf/llava-1.5-13b-hf",                    "org": "llava-hf",      "params": "13B", "type": "Open"},
-    {"model": "Qwen-VL-Chat",       "link": "https://huggingface.co/Qwen/Qwen-VL-Chat",                            "org": "Alibaba",       "params": "7B",  "type": "Open"},
-    {"model": "CogVLM2-19B",        "link": "https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B",                 "org": "THUDM",         "params": "19B", "type": "Open"},
-    {"model": "IDEFICS2-8B",        "link": "https://huggingface.co/HuggingFaceM4/idefics2-8b",                     "org": "HuggingFace",   "params": "8B",  "type": "Open"},
-    {"model": "InstructBLIP-13B",   "link": "https://huggingface.co/Salesforce/instructblip-vicuna-13b",            "org": "Salesforce",    "params": "13B", "type": "Open"},
-    {"model": "Phi-3.5-Vision",     "link": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",             "org": "Microsoft",     "params": "4B",  "type": "Open"},
-    {"model": "MiniCPM-V-2.6",      "link": "https://huggingface.co/openbmb/MiniCPM-V-2_6",                        "org": "OpenBMB",       "params": "8B",  "type": "Open"},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": "https://huggingface.co/Salesforce/blip2-flan-t5-xxl",                 "org": "Salesforce",    "params": "11B", "type": "Open"},
-    {"model": "mPLUG-Owl2",         "link": "https://huggingface.co/MAGAer13/mplug-owl2-llama2-7b",                 "org": "Alibaba DAMO",  "params": "7B",  "type": "Open"},
 ]
 # ========================
 # PRINCIPLE DATA  (Table A2)
-# 7 Human-Centric (HC) principles, scores per model
-# Replace None with actual values from the paper
 # ========================
-# Columns: Fairness | Ethics | Understanding | Reasoning | Language | Empathy | Robustness
 PRINCIPLE_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Fairness": None, "Ethics": None, "Understanding": None, "Reasoning": None, "Language": None, "Empathy": None, "Robustness": None, "Overall": None},
 ]
 # ========================
@@ -70,139 +67,158 @@ PRINCIPLE_DATA = [
 # T1–T7 per-model accuracy / scores
 # ========================
-# T1: Scene Understanding — Accuracy (%)
 T1_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
 ]
-# T2: Instance Identity — Accuracy (%)
 T2_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
 ]
-# T3: Multiple-Choice VQA — Accuracy (%)
 T3_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Age": None, "Gender": None, "Race": None, "Occupation": None, "Sports": None, "Overall": None},
 ]
-# T4: Multilingual VQA — Accuracy (%) per language
-LANGUAGES = ["English", "Spanish", "French", "German", "Chinese", "Japanese", "Arabic", "Hindi", "Portuguese", "Italian", "Korean"]
 T4_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "English": None, "Spanish": None, "French": None, "German": None, "Chinese": None, "Japanese": None, "Arabic": None, "Hindi": None, "Portuguese": None, "Italian": None, "Korean": None, "Avg": None},
 ]
-# T5: Visual Grounding — Accuracy (%)
 T5_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Overall": None},
 ]
-# T6: Empathetic Captioning — quality score
 T6_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Score": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Score": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Score": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Score": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Score": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Score": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Score": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Score": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Score": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Score": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Score": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Score": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Score": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Score": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Score": None},
 ]
-# T7: Image Resilience — Accuracy (%)
 T7_DATA = [
-    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Overall": None},
-    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Overall": None},
-    {"model": "InternVL2-26B",      "link": MODELS[2]["link"],  "Overall": None},
-    {"model": "InternVL-Chat-v1.5", "link": MODELS[3]["link"],  "Overall": None},
-    {"model": "LLaVA-NeXT-34B",     "link": MODELS[4]["link"],  "Overall": None},
-    {"model": "LLaVA-NeXT-13B",     "link": MODELS[5]["link"],  "Overall": None},
-    {"model": "LLaVA-1.5-13B",      "link": MODELS[6]["link"],  "Overall": None},
-    {"model": "Qwen-VL-Chat",       "link": MODELS[7]["link"],  "Overall": None},
-    {"model": "CogVLM2-19B",        "link": MODELS[8]["link"],  "Overall": None},
-    {"model": "IDEFICS2-8B",        "link": MODELS[9]["link"],  "Overall": None},
-    {"model": "InstructBLIP-13B",   "link": MODELS[10]["link"], "Overall": None},
-    {"model": "Phi-3.5-Vision",     "link": MODELS[11]["link"], "Overall": None},
-    {"model": "MiniCPM-V-2.6",      "link": MODELS[12]["link"], "Overall": None},
-    {"model": "BLIP-2-FlanT5-XXL",  "link": MODELS[13]["link"], "Overall": None},
-    {"model": "mPLUG-Owl2",         "link": MODELS[14]["link"], "Overall": None},
 ]
@@ -240,7 +256,7 @@ INTRODUCTION_HTML = f"""
         <div class="stat-label">Image–Question Pairs</div>
     </div>
     <div class="stat-box">
-        <div class="stat-value">1,500</div>
         <div class="stat-label">Unique Images</div>
     </div>
     <div class="stat-box">
@@ -268,7 +284,7 @@ grounded in seven human-centric (HC) principles.
 ### Dataset Overview
 - **32,000+ expert-verified** image–question pairs from real-world news imagery
-- **1,500 unique images** spanning diverse social contexts
 - **7 evaluation tasks** (T1–T7) covering scene understanding, identity, reasoning, language, grounding, empathy, and robustness
 - **7 HC principles**: Fairness, Ethics, Understanding, Reasoning, Language, Empathy, Robustness
 - **5 social attributes**: Age, Gender, Race, Occupation, Sports
@@ -315,6 +331,7 @@ This dataset is released under **CC BY-NC-SA 4.0**.
 ### Contact
 - **Website:** [{WEBSITE_URL}]({WEBSITE_URL})
 - **Dataset:** [HuggingFace]({DATASET_URL})
 - **Code:** [GitHub]({GITHUB_URL})
@@ -329,7 +346,7 @@ This dataset is released under **CC BY-NC-SA 4.0**.
 # TABLE BUILDERS
 # ========================
-def _make_df(data: list[dict], score_cols: list[str], pct: bool = True) -> pd.DataFrame:
     rows = []
     for item in data:
         row = {"Model": make_clickable_model(item["model"], item.get("link"))}
@@ -364,7 +381,7 @@ def build_overall_leaderboard():
     )
-def build_task_leaderboard(task_data: list[dict], score_cols: list[str], pct: bool = True):
     df = _make_df(task_data, score_cols, pct=pct)
     return gr.Dataframe(
         value=df,
@@ -375,9 +392,9 @@ def build_task_leaderboard(task_data: list[dict], score_cols: list[str], pct: bo
     )
-def build_social_leaderboard(task_data: list[dict]):
-    ATTR_COLS = ["Age", "Gender", "Race", "Occupation", "Sports", "Overall"]
-    return build_task_leaderboard(task_data, ATTR_COLS, pct=True)
 def build_multilingual_leaderboard():
@@ -396,20 +413,33 @@ with demo:
     <div id="page-header">
         <div id="header-container">
             <div id="left-container">
-                <img id="black-logo" src="/gradio_api/file={black_logo_path}" onerror="this.style.display='none'">
-                <img id="white-logo" src="/gradio_api/file={white_logo_path}" onerror="this.style.display='none'">
             </div>
             <div id="centre-container">
                 <h1>HumaniBench Leaderboard</h1>
                 <p>A Human-Centric Evaluation Framework for Large Multimodal Models</p>
             </div>
-            <div id="right-container"></div>
         </div>
     </div>
     """)
     gr.HTML(INTRODUCTION_HTML)
     with gr.Tabs():
         # ── Tab 1: Overall Rankings ──────────────────────────────────────────
@@ -422,7 +452,7 @@ with demo:
                 </div>
             """, elem_classes="markdown-text")
             build_overall_leaderboard()
-            gr.Markdown("*Scores are averaged across tasks associated with each principle. -- indicates data not yet available.*")
         # ── Tab 2: Task Results ──────────────────────────────────────────────
         with gr.Tab("Task Results"):
@@ -435,80 +465,34 @@ with demo:
             with gr.Tabs():
                 with gr.Tab("T1 · Scene Understanding"):
-                    gr.Markdown("**Metric:** Accuracy (%) | Breakdown by social attribute (Age / Gender / Race / Occupation / Sports)")
-                    build_social_leaderboard(T1_DATA)
                 with gr.Tab("T2 · Instance Identity"):
-                    gr.Markdown("**Metric:** Accuracy (%) | Breakdown by social attribute")
-                    build_social_leaderboard(T2_DATA)
                 with gr.Tab("T3 · MC-VQA"):
-                    gr.Markdown("**Metric:** Accuracy (%) | Breakdown by social attribute")
-                    build_social_leaderboard(T3_DATA)
                 with gr.Tab("T4 · Multilingual"):
-                    gr.Markdown("**Metric:** Accuracy (%) across 11 languages — see the Multilingual tab for the full view.")
                     build_multilingual_leaderboard()
                 with gr.Tab("T5 · Visual Grounding"):
-                    gr.Markdown("**Metric:** Accuracy (%)")
-                    build_task_leaderboard(T5_DATA, ["Overall"], pct=True)
                 with gr.Tab("T6 · Empathetic Captioning"):
-                    gr.Markdown("**Metric:** Empathy score")
-                    build_task_leaderboard(T6_DATA, ["Score"], pct=False)
                 with gr.Tab("T7 · Image Resilience"):
-                    gr.Markdown("**Metric:** Accuracy (%)")
-                    build_task_leaderboard(T7_DATA, ["Overall"], pct=True)
-        # ── Tab 3: Social Attributes ─────────────────────────────────────────
-        with gr.Tab("Social Attributes"):
-            gr.Markdown("""
-                <div class="warning-box">
-                <h3>Fairness Analysis: Accuracy by Social Attribute</h3>
-                Performance breakdowns across Age, Gender, Race, Occupation, and Sports
-                for Tasks T1, T2, and T3. Disparities reveal systematic biases.
-                </div>
-            """, elem_classes="markdown-text")
-            with gr.Tabs():
-                with gr.Tab("T1 · Scene Understanding"):
-                    gr.Markdown("**Metric:** Accuracy (%) per social attribute group")
-                    build_social_leaderboard(T1_DATA)
-                with gr.Tab("T2 · Instance Identity"):
-                    gr.Markdown("**Metric:** Accuracy (%) per social attribute group")
-                    build_social_leaderboard(T2_DATA)
-                with gr.Tab("T3 · MC-VQA"):
-                    gr.Markdown("**Metric:** Accuracy (%) per social attribute group")
-                    build_social_leaderboard(T3_DATA)
-            gr.Markdown("""
-                <div class="warning-box">
-                ⚠️ Performance disparities across demographic groups should be addressed before deploying models
-                in high-stakes settings.
-                </div>
-            """, elem_classes="markdown-text")
-        # ── Tab 4: Multilingual ──────────────────────────────────────────────
-        with gr.Tab("Multilingual (T4)"):
-            gr.Markdown("""
-                <div class="info-box">
-                <h3>Task T4: Multilingual VQA — Accuracy (%) per Language</h3>
-                Models are evaluated on visual questions posed in 11 languages. Avg is the
-                macro-average across all languages.
-                </div>
-            """, elem_classes="markdown-text")
-            build_multilingual_leaderboard()
-            gr.Markdown("""
-                **Languages:** English · Spanish · French · German · Chinese · Japanese · Arabic · Hindi · Portuguese · Italian · Korean
-                *Gaps between high-resource (English, French) and low-resource (Arabic, Hindi) languages expose multilingual inclusivity limitations.*
-            """, elem_classes="markdown-text")
-        # ── Tab 5: About ─────────────────────────────────────────────────────
         with gr.Tab("About"):
             gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
@@ -526,6 +510,4 @@ with demo:
 if __name__ == "__main__":
-    import os
-    assets = [p for p in [black_logo_path, white_logo_path] if os.path.exists(p)]
-    demo.launch(allowed_paths=assets if assets else None)

 DATASET_URL = "https://huggingface.co/datasets/vector-institute/HumaniBench"
 WEBSITE_URL = "https://vectorinstitute.github.io/humanibench/"
+vector_logo_path    = "src/assets/vector-favicon-48x48.svg"
+humanibench_logo_path = "src/assets/HumaniBenchLogo.ico"
 # ========================
+# MODEL REGISTRY  (Table A2 order)
 # ========================
 MODELS = [
+    {"model": "GPT-4o",              "link": "https://openai.com/gpt-4o",                                              "org": "OpenAI",    "params": "-",    "type": "Closed"},
+    {"model": "Gemini-2.0-Flash",    "link": "https://deepmind.google/technologies/gemini/",                           "org": "Google",    "params": "-",    "type": "Closed"},
+    {"model": "Qwen-2.5-7B",         "link": "https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct",                    "org": "Alibaba",   "params": "7B",   "type": "Open"},
+    {"model": "LLaVA-v1.6",          "link": "https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf",              "org": "LLaVA",     "params": "7B",   "type": "Open"},
+    {"model": "Phi-4",               "link": "https://huggingface.co/microsoft/Phi-4-multimodal-instruct",             "org": "Microsoft", "params": "5.6B", "type": "Open"},
+    {"model": "Gemma-3",             "link": "https://huggingface.co/google/gemma-3-4b-it",                            "org": "Google",    "params": "4B",   "type": "Open"},
+    {"model": "CogVLM2-19B",         "link": "https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B",                  "org": "THUDM",     "params": "19B",  "type": "Open"},
+    {"model": "Phi-3.5",             "link": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",               "org": "Microsoft", "params": "4B",   "type": "Open"},
+    {"model": "Molmo-7V",            "link": "https://huggingface.co/allenai/Molmo-7B-O-0924",                         "org": "Allen AI",  "params": "7B",   "type": "Open"},
+    {"model": "Aya-Vision-8B",       "link": "https://huggingface.co/CohereForAI/aya-vision-8b",                       "org": "Cohere",    "params": "8B",   "type": "Open"},
+    {"model": "InternVL2.5",         "link": "https://huggingface.co/OpenGVLab/InternVL2_5-8B",                        "org": "OpenGVLab", "params": "8B",   "type": "Open"},
+    {"model": "Janus-Pro-7B",        "link": "https://huggingface.co/deepseek-ai/Janus-Pro-7B",                        "org": "DeepSeek",  "params": "7B",   "type": "Open"},
+    {"model": "GLM-4V-9B",           "link": "https://huggingface.co/THUDM/glm-4v-9b",                                 "org": "THUDM",     "params": "9B",   "type": "Open"},
+    {"model": "Llama-3.2-11B",       "link": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct",        "org": "Meta",      "params": "11B",  "type": "Open"},
+    {"model": "DeepSeek-VL2-Small",  "link": "https://huggingface.co/deepseek-ai/deepseek-vl2-small",                  "org": "DeepSeek",  "params": "3B",   "type": "Open"},
 ]
 # ========================
 # PRINCIPLE DATA  (Table A2)
+# Scores are percentages; Overall = mean of all 7 principles
 # ========================
 PRINCIPLE_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Fairness": 61.1, "Ethics": 99.0, "Understanding": 74.8, "Reasoning": 79.2, "Language": 62.5, "Empathy": 90.5, "Robustness": 50.90, "Overall": 74.00},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Fairness": 61.0, "Ethics": 98.9, "Understanding": 73.5, "Reasoning": 78.8, "Language": 62.2, "Empathy": 89.5, "Robustness": 57.20, "Overall": 74.44},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Fairness": 63.1, "Ethics": 96.5, "Understanding": 84.9, "Reasoning": 67.1, "Language": 57.4, "Empathy": 73.8, "Robustness": 53.60, "Overall": 70.91},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Fairness": 59.7, "Ethics": 94.4, "Understanding": 80.3, "Reasoning": 68.1, "Language": 55.4, "Empathy": 66.3, "Robustness": 60.60, "Overall": 69.26},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Fairness": 59.2, "Ethics": 98.2, "Understanding": 78.6, "Reasoning": 77.4, "Language": 61.3, "Empathy": 79.0, "Robustness": 45.70, "Overall": 71.34},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Fairness": 57.5, "Ethics": 94.6, "Understanding": 73.2, "Reasoning": 67.8, "Language": 57.7, "Empathy": 79.8, "Robustness": 58.30, "Overall": 69.84},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Fairness": 53.1, "Ethics": 96.3, "Understanding": 67.5, "Reasoning": 74.4, "Language": 60.4, "Empathy": 68.0, "Robustness": 35.12, "Overall": 64.97},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Fairness": 56.0, "Ethics": 96.1, "Understanding": 72.3, "Reasoning": 69.7, "Language": 57.3, "Empathy": 70.8, "Robustness": 50.50, "Overall": 67.53},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Fairness": 52.4, "Ethics": 94.8, "Understanding": 66.2, "Reasoning": 65.8, "Language": 55.0, "Empathy": 58.8, "Robustness": 49.70, "Overall": 63.24},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Fairness": 51.7, "Ethics": 94.9, "Understanding": 64.4, "Reasoning": 68.1, "Language": 50.8, "Empathy": 77.8, "Robustness": 45.90, "Overall": 64.80},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Fairness": 50.9, "Ethics": 93.8, "Understanding": 63.8, "Reasoning": 64.4, "Language": 51.1, "Empathy": 74.5, "Robustness": 56.40, "Overall": 64.99},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Fairness": 50.2, "Ethics": 96.9, "Understanding": 63.3, "Reasoning": 65.2, "Language": 57.6, "Empathy": 69.5, "Robustness": 52.80, "Overall": 65.07},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Fairness": 50.2, "Ethics": 94.4, "Understanding": 63.9, "Reasoning": 63.0, "Language": 50.0, "Empathy": 67.8, "Robustness": 50.50, "Overall": 62.83},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Fairness": 50.2, "Ethics": 94.9, "Understanding": 58.9, "Reasoning": 63.0, "Language": 50.7, "Empathy": 71.3, "Robustness": 56.70, "Overall": 63.67},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Fairness": 48.8, "Ethics": 90.6, "Understanding": 54.8, "Reasoning": 61.6, "Language": 49.1, "Empathy": 59.3, "Robustness": 55.70, "Overall": 59.99},
 ]
 # ========================
 # T1–T7 per-model accuracy / scores
 # ========================
+def _task_rows(extra_keys: list) -> list:
+    """Generate per-model rows with None scores for the given extra columns."""
+    return [
+        {"model": m["model"], "link": m["link"], **{k: None for k in extra_keys}}
+        for m in MODELS
+    ]
+T1_COLS = ["Accuracy", "Bias", "Hallucination", "Faithfulness", "Context Rel.", "Coherence"]
+# T1: Scene Understanding (Open-Ended VQA)
 T1_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Accuracy": 74.80, "Bias":  0.90, "Hallucination":  2.10, "Faithfulness": 76.50, "Context Rel.": 75.20, "Coherence": 75.80},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Accuracy": 73.20, "Bias":  1.10, "Hallucination":  1.70, "Faithfulness": 75.90, "Context Rel.": 74.30, "Coherence": 74.80},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Accuracy": 67.37, "Bias":  9.33, "Hallucination":  9.38, "Faithfulness": 67.92, "Context Rel.": 66.28, "Coherence": 66.40},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Accuracy": 64.34, "Bias":  9.03, "Hallucination":  9.12, "Faithfulness": 65.33, "Context Rel.": 68.10, "Coherence": 66.90},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Accuracy": 68.10, "Bias":  1.23, "Hallucination":  3.12, "Faithfulness": 72.38, "Context Rel.": 73.47, "Coherence": 73.20},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Accuracy": 66.50, "Bias":  8.50, "Hallucination":  8.20, "Faithfulness": 70.10, "Context Rel.": 68.30, "Coherence": 69.00},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Accuracy": 67.34, "Bias": 11.38, "Hallucination": 10.45, "Faithfulness": 69.01, "Context Rel.": 71.29, "Coherence": 69.80},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Accuracy": 67.19, "Bias":  2.40, "Hallucination":  5.21, "Faithfulness": 67.45, "Context Rel.": 65.28, "Coherence": 65.90},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Accuracy": 67.12, "Bias":  1.87, "Hallucination":  4.35, "Faithfulness": 64.78, "Context Rel.": 62.01, "Coherence": 62.60},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Accuracy": 62.19, "Bias":  8.12, "Hallucination":  8.46, "Faithfulness": 68.84, "Context Rel.": 68.22, "Coherence": 68.00},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Accuracy": 61.10, "Bias": 10.70, "Hallucination": 10.73, "Faithfulness": 65.71, "Context Rel.": 64.18, "Coherence": 64.20},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Accuracy": 62.10, "Bias":  1.35, "Hallucination":  3.21, "Faithfulness": 69.26, "Context Rel.": 67.09, "Coherence": 67.50},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Accuracy": 60.18, "Bias":  8.63, "Hallucination":  8.34, "Faithfulness": 69.98, "Context Rel.": 65.10, "Coherence": 65.40},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Accuracy": 63.40, "Bias": 19.30, "Hallucination": 15.67, "Faithfulness": 62.09, "Context Rel.": 66.01, "Coherence": 64.30},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Accuracy": 59.10, "Bias": 12.56, "Hallucination": 11.29, "Faithfulness": 62.14, "Context Rel.": 63.10, "Coherence": 63.00},
 ]
+T2_COLS = ["Accuracy", "Bias", "Hallucination", "Faithfulness", "Context Rel.", "Coherence"]
+# T2: Instance Identity (Open-Ended VQA)
 T2_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Accuracy": 68.10, "Bias":  1.50, "Hallucination":  3.00, "Faithfulness": 85.00, "Context Rel.": 85.00, "Coherence": 85.00},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Accuracy": 66.50, "Bias":  2.00, "Hallucination":  4.00, "Faithfulness": 83.00, "Context Rel.": 82.00, "Coherence": 82.00},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Accuracy": 62.37, "Bias": 10.21, "Hallucination":  6.27, "Faithfulness": 67.92, "Context Rel.": 68.65, "Coherence": 66.94},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Accuracy": 59.34, "Bias":  9.82, "Hallucination": 10.01, "Faithfulness": 65.33, "Context Rel.": 66.10, "Coherence": 65.02},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Accuracy": 63.10, "Bias":  2.07, "Hallucination":  4.08, "Faithfulness": 81.67, "Context Rel.": 82.21, "Coherence": 81.76},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Accuracy": 61.94, "Bias": 15.19, "Hallucination":  5.00, "Faithfulness": 78.96, "Context Rel.": 75.00, "Coherence": 76.00},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Accuracy": 62.34, "Bias": 12.31, "Hallucination":  6.53, "Faithfulness": 74.01, "Context Rel.": 70.14, "Coherence": 72.45},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Accuracy": 62.19, "Bias":  3.39, "Hallucination":  6.19, "Faithfulness": 67.45, "Context Rel.": 68.34, "Coherence": 67.80},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Accuracy": 57.19, "Bias":  9.02, "Hallucination":  9.39, "Faithfulness": 68.84, "Context Rel.": 67.74, "Coherence": 66.89},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Accuracy": 62.12, "Bias":  2.83, "Hallucination":  5.44, "Faithfulness": 64.78, "Context Rel.": 67.33, "Coherence": 65.41},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Accuracy": 56.10, "Bias": 11.74, "Hallucination": 11.69, "Faithfulness": 65.71, "Context Rel.": 64.49, "Coherence": 62.92},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Accuracy": 57.10, "Bias":  2.16, "Hallucination":  4.24, "Faithfulness": 69.26, "Context Rel.": 71.82, "Coherence": 71.09},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Accuracy": 55.18, "Bias":  9.59, "Hallucination":  9.18, "Faithfulness": 69.98, "Context Rel.": 65.73, "Coherence": 64.30},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Accuracy": 54.10, "Bias": 13.48, "Hallucination": 12.41, "Faithfulness": 64.05, "Context Rel.": 63.12, "Coherence": 61.37},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Accuracy": 58.40, "Bias": 20.42, "Hallucination": 16.72, "Faithfulness": 62.09, "Context Rel.": 60.04, "Coherence": 59.11},
 ]
+T3_COLS = ["Accuracy", "Bias", "Hallucination", "Faithfulness", "Context Rel.", "Coherence"]
+# T3: Multiple-Choice VQA
 T3_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Accuracy": 68.10, "Bias":  0.95, "Hallucination":  1.20, "Faithfulness": 82.30, "Context Rel.": 80.45, "Coherence": 73.90},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Accuracy": 70.40, "Bias":  0.85, "Hallucination":  0.95, "Faithfulness": 81.60, "Context Rel.": 82.10, "Coherence": 74.60},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Accuracy": 52.93, "Bias":  6.30, "Hallucination":  6.35, "Faithfulness": 69.22, "Context Rel.": 67.54, "Coherence": 66.63},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Accuracy": 50.89, "Bias":  7.68, "Hallucination":  7.22, "Faithfulness": 64.77, "Context Rel.": 63.06, "Coherence": 62.25},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Accuracy": 60.80, "Bias":  2.01, "Hallucination":  3.00, "Faithfulness": 76.55, "Context Rel.": 74.77, "Coherence": 73.86},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Accuracy": 54.22, "Bias":  5.43, "Hallucination":  5.80, "Faithfulness": 71.14, "Context Rel.": 69.37, "Coherence": 68.46},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Accuracy": 61.10, "Bias":  1.95, "Hallucination":  2.90, "Faithfulness": 77.20, "Context Rel.": 75.40, "Coherence": 74.50},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Accuracy": 53.18, "Bias":  6.13, "Hallucination":  6.24, "Faithfulness": 69.98, "Context Rel.": 68.16, "Coherence": 67.26},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Accuracy": 51.47, "Bias":  7.29, "Hallucination":  6.97, "Faithfulness": 66.02, "Context Rel.": 64.38, "Coherence": 63.56},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Accuracy": 51.64, "Bias":  7.17, "Hallucination":  6.90, "Faithfulness": 67.33, "Context Rel.": 65.69, "Coherence": 64.74},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Accuracy": 49.05, "Bias":  8.92, "Hallucination":  8.00, "Faithfulness": 61.01, "Context Rel.": 59.37, "Coherence": 58.53},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Accuracy": 55.51, "Bias":  4.56, "Hallucination":  5.25, "Faithfulness": 72.33, "Context Rel.": 70.47, "Coherence": 69.53},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Accuracy": 50.76, "Bias":  7.76, "Hallucination":  7.27, "Faithfulness": 63.26, "Context Rel.": 61.55, "Coherence": 60.73},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Accuracy": 45.67, "Bias": 18.28, "Hallucination": 12.98, "Faithfulness": 52.02, "Context Rel.": 55.29, "Coherence": 54.39},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Accuracy": 45.35, "Bias": 14.13, "Hallucination": 12.55, "Faithfulness": 54.21, "Context Rel.": 56.46, "Coherence": 54.52},
 ]
+LANGUAGES = ["English", "French", "Spanish", "Portuguese", "Mandarin", "Korean", "Urdu", "Persian", "Bengali", "Punjabi", "Tamil"]
+# T4: Multilingual VQA — Accuracy (%) per language
 T4_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "English": 64.6, "French": 64.0, "Spanish": 63.4, "Portuguese": 62.8, "Mandarin": 62.3, "Korean": 61.8, "Urdu": 60.1, "Persian": 59.7, "Bengali": 59.1, "Punjabi": 58.6, "Tamil": 58.1, "Avg": 61.32},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "English": 64.4, "French": 63.8, "Spanish": 63.2, "Portuguese": 62.6, "Mandarin": 62.1, "Korean": 61.7, "Urdu": 60.0, "Persian": 59.5, "Bengali": 58.9, "Punjabi": 58.4, "Tamil": 58.0, "Avg": 61.15},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "English": 59.2, "French": 58.6, "Spanish": 57.9, "Portuguese": 57.5, "Mandarin": 57.0, "Korean": 56.6, "Urdu": 55.1, "Persian": 54.6, "Bengali": 53.9, "Punjabi": 53.5, "Tamil": 53.1, "Avg": 56.09},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "English": 56.8, "French": 56.4, "Spanish": 55.6, "Portuguese": 55.1, "Mandarin": 54.6, "Korean": 54.1, "Urdu": 52.8, "Persian": 52.4, "Bengali": 51.8, "Punjabi": 51.4, "Tamil": 51.0, "Avg": 53.82},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "English": 63.3, "French": 62.8, "Spanish": 62.1, "Portuguese": 61.6, "Mandarin": 61.1, "Korean": 60.6, "Urdu": 58.9, "Persian": 58.5, "Bengali": 57.8, "Punjabi": 57.3, "Tamil": 56.9, "Avg": 60.08},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "English": 59.5, "French": 59.0, "Spanish": 58.2, "Portuguese": 57.7, "Mandarin": 57.3, "Korean": 56.9, "Urdu": 55.3, "Persian": 54.9, "Bengali": 54.3, "Punjabi": 53.8, "Tamil": 53.3, "Avg": 56.38},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "English": 61.6, "French": 61.3, "Spanish": 60.9, "Portuguese": 61.4, "Mandarin": 60.9, "Korean": 60.4, "Urdu": 58.7, "Persian": 58.3, "Bengali": 57.6, "Punjabi": 57.1, "Tamil": 56.6, "Avg": 59.53},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "English": 59.1, "French": 58.6, "Spanish": 58.0, "Portuguese": 57.5, "Mandarin": 57.0, "Korean": 56.6, "Urdu": 55.1, "Persian": 54.6, "Bengali": 53.9, "Punjabi": 53.5, "Tamil": 53.1, "Avg": 56.09},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "English": 56.1, "French": 55.6, "Spanish": 54.9, "Portuguese": 54.5, "Mandarin": 54.2, "Korean": 53.8, "Urdu": 52.5, "Persian": 52.1, "Bengali": 51.5, "Punjabi": 51.1, "Tamil": 50.7, "Avg": 53.36},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "English": 55.8, "French": 55.0, "Spanish": 54.2, "Portuguese": 53.2, "Mandarin": 52.3, "Korean": 51.7, "Urdu": 51.3, "Persian": 51.7, "Bengali": 51.9, "Punjabi": 49.9, "Tamil": 49.1, "Avg": 52.37},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "English": 53.9, "French": 53.1, "Spanish": 52.4, "Portuguese": 51.1, "Mandarin": 50.5, "Korean": 49.7, "Urdu": 49.3, "Persian": 49.9, "Bengali": 50.1, "Punjabi": 47.9, "Tamil": 47.3, "Avg": 50.47},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "English": 58.5, "French": 58.1, "Spanish": 57.5, "Portuguese": 57.0, "Mandarin": 56.5, "Korean": 55.8, "Urdu": 54.5, "Persian": 54.1, "Bengali": 53.5, "Punjabi": 53.0, "Tamil": 52.6, "Avg": 55.55},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "English": 53.3, "French": 52.7, "Spanish": 51.8, "Portuguese": 50.8, "Mandarin": 50.1, "Korean": 49.4, "Urdu": 49.0, "Persian": 49.5, "Bengali": 49.7, "Punjabi": 47.6, "Tamil": 47.2, "Avg": 50.10},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "English": 51.9, "French": 51.5, "Spanish": 50.7, "Portuguese": 50.3, "Mandarin": 49.9, "Korean": 49.4, "Urdu": 48.0, "Persian": 47.6, "Bengali": 47.0, "Punjabi": 46.5, "Tamil": 46.1, "Avg": 49.00},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "English": 52.8, "French": 52.2, "Spanish": 51.3, "Portuguese": 50.3, "Mandarin": 49.5, "Korean": 48.9, "Urdu": 48.5, "Persian": 48.9, "Bengali": 49.1, "Punjabi": 47.0, "Tamil": 46.6, "Avg": 49.55},
 ]
+T5_COLS = ["mAP@0.5", "mAP@0.75", "Mean IoU", "Missing (%)"]
+# T5: Visual Grounding (Table 9) — mAP values are %; Mean IoU is 0–1; Missing (%) = images with no predicted box
 T5_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "mAP@0.5": 63.46, "mAP@0.75": 40.32, "Mean IoU": 0.34, "Missing (%)": 72.73},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "mAP@0.5": 56.51, "mAP@0.75": 52.15, "Mean IoU": 0.23, "Missing (%)":  0.00},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "mAP@0.5": 98.43, "mAP@0.75": 94.16, "Mean IoU": 0.90, "Missing (%)":  0.00},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "mAP@0.5": 96.49, "mAP@0.75": 82.44, "Mean IoU": 0.78, "Missing (%)":  0.00},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "mAP@0.5": 72.11, "mAP@0.75": 46.18, "Mean IoU": 0.47, "Missing (%)":  0.00},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "mAP@0.5": 56.34, "mAP@0.75": 54.23, "Mean IoU": 0.49, "Missing (%)": 16.34},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "mAP@0.5": 50.88, "mAP@0.75": 50.42, "Mean IoU": 0.10, "Missing (%)":  0.00},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "mAP@0.5": 63.45, "mAP@0.75": 58.35, "Mean IoU": 0.37, "Missing (%)":  0.00},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "mAP@0.5": 43.32, "mAP@0.75": 34.34, "Mean IoU": 0.45, "Missing (%)":  0.00},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "mAP@0.5": 54.15, "mAP@0.75": 41.26, "Mean IoU": 0.07, "Missing (%)":  0.00},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "mAP@0.5": 56.39, "mAP@0.75": 36.52, "Mean IoU": 0.22, "Missing (%)":  6.67},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "mAP@0.5": 50.18, "mAP@0.75": 10.04, "Mean IoU": 0.14, "Missing (%)":  2.80},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "mAP@0.5": 52.20, "mAP@0.75": 35.55, "Mean IoU": 0.12, "Missing (%)":  4.21},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "mAP@0.5": 38.34, "mAP@0.75": 35.53, "Mean IoU": 0.25, "Missing (%)": 32.24},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "mAP@0.5": 25.34, "mAP@0.75": 21.23, "Mean IoU": 0.14, "Missing (%)":  5.35},
 ]
+T6_COLS = ["Empathy", "Anxiety", "Sadness", "Joy"]
+# T6: Empathetic Captioning (Table 10) — LLM-judge rubric, 0–100
 T6_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Empathy": 95, "Anxiety": 15, "Sadness": 12, "Joy": 94},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Empathy": 92, "Anxiety": 13, "Sadness": 11, "Joy": 90},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Empathy": 68, "Anxiety": 25, "Sadness": 14, "Joy": 66},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Empathy": 70, "Anxiety": 37, "Sadness": 36, "Joy": 68},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Empathy": 83, "Anxiety": 22, "Sadness": 25, "Joy": 80},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Empathy": 84, "Anxiety": 23, "Sadness": 24, "Joy": 82},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Empathy": 76, "Anxiety": 44, "Sadness": 33, "Joy": 73},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Empathy": 70, "Anxiety": 28, "Sadness": 27, "Joy": 68},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Empathy": 60, "Anxiety": 47, "Sadness": 36, "Joy": 58},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Empathy": 72, "Anxiety": 12, "Sadness": 19, "Joy": 70},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Empathy": 72, "Anxiety": 20, "Sadness": 24, "Joy": 70},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Empathy": 66, "Anxiety": 32, "Sadness": 20, "Joy": 64},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Empathy": 74, "Anxiety": 42, "Sadness": 31, "Joy": 70},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Empathy": 78, "Anxiety": 46, "Sadness": 25, "Joy": 68},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Empathy": 68, "Anxiety": 59, "Sadness": 39, "Joy": 67},
 ]
+T7_COLS = ["Clean Acc.", "Perturbated Acc.", "Retention (%)"]
+# T7: Model Robustness under Perturbations (Table 11) — Retention = Perturbated / Clean × 100
 T7_DATA = [
+    {"model": "GPT-4o",             "link": MODELS[0]["link"],  "Clean Acc.": 65.85, "Perturbated Acc.": 40.80, "Retention (%)": 61.96},
+    {"model": "Gemini-2.0-Flash",   "link": MODELS[1]["link"],  "Clean Acc.": 60.40, "Perturbated Acc.": 39.00, "Retention (%)": 64.57},
+    {"model": "Qwen-2.5-7B",        "link": MODELS[2]["link"],  "Clean Acc.": 93.84, "Perturbated Acc.": 70.01, "Retention (%)": 74.63},
+    {"model": "LLaVA-v1.6",         "link": MODELS[3]["link"],  "Clean Acc.": 87.50, "Perturbated Acc.": 67.36, "Retention (%)": 77.53},
+    {"model": "Phi-4",              "link": MODELS[4]["link"],  "Clean Acc.": 72.05, "Perturbated Acc.": 44.43, "Retention (%)": 61.67},
+    {"model": "Gemma-3",            "link": MODELS[5]["link"],  "Clean Acc.": 73.10, "Perturbated Acc.": 51.75, "Retention (%)": 70.82},
+    {"model": "CogVLM2-19B",        "link": MODELS[6]["link"],  "Clean Acc.": 54.00, "Perturbated Acc.": 34.50, "Retention (%)": 63.89},
+    {"model": "Phi-3.5",            "link": MODELS[7]["link"],  "Clean Acc.": 67.25, "Perturbated Acc.": 42.00, "Retention (%)": 62.45},
+    {"model": "Molmo-7V",           "link": MODELS[8]["link"],  "Clean Acc.": 71.15, "Perturbated Acc.": 45.50, "Retention (%)": 63.96},
+    {"model": "Aya-Vision-8B",      "link": MODELS[9]["link"],  "Clean Acc.": 59.50, "Perturbated Acc.": 32.20, "Retention (%)": 54.03},
+    {"model": "InternVL2.5",        "link": MODELS[10]["link"], "Clean Acc.": 59.80, "Perturbated Acc.": 37.75, "Retention (%)": 63.12},
+    {"model": "Janus-Pro-7B",       "link": MODELS[11]["link"], "Clean Acc.": 55.60, "Perturbated Acc.": 31.85, "Retention (%)": 57.31},
+    {"model": "GLM-4V-9B",          "link": MODELS[12]["link"], "Clean Acc.": 54.75, "Perturbated Acc.": 29.85, "Retention (%)": 54.52},
+    {"model": "Llama-3.2-11B",      "link": MODELS[13]["link"], "Clean Acc.": 62.15, "Perturbated Acc.": 40.25, "Retention (%)": 64.74},
+    {"model": "DeepSeek-VL2-Small", "link": MODELS[14]["link"], "Clean Acc.": 55.90, "Perturbated Acc.": 33.60, "Retention (%)": 60.11},
 ]
         <div class="stat-label">Image–Question Pairs</div>
     </div>
     <div class="stat-box">
+        <div class="stat-value">~1,500</div>
         <div class="stat-label">Unique Images</div>
     </div>
     <div class="stat-box">
 ### Dataset Overview
 - **32,000+ expert-verified** image–question pairs from real-world news imagery
+- **~1,500 unique images** spanning diverse social contexts
 - **7 evaluation tasks** (T1–T7) covering scene understanding, identity, reasoning, language, grounding, empathy, and robustness
 - **7 HC principles**: Fairness, Ethics, Understanding, Reasoning, Language, Empathy, Robustness
 - **5 social attributes**: Age, Gender, Race, Occupation, Sports
 ### Contact
+- **Email:** [shaina.raza@vectorinstitute.ai](mailto:shaina.raza@vectorinstitute.ai)
 - **Website:** [{WEBSITE_URL}]({WEBSITE_URL})
 - **Dataset:** [HuggingFace]({DATASET_URL})
 - **Code:** [GitHub]({GITHUB_URL})
 # TABLE BUILDERS
 # ========================
+def _make_df(data: list, score_cols: list, pct: bool = True) -> pd.DataFrame:
     rows = []
     for item in data:
         row = {"Model": make_clickable_model(item["model"], item.get("link"))}
     )
+def build_task_leaderboard(task_data: list, score_cols: list, pct: bool = True):
     df = _make_df(task_data, score_cols, pct=pct)
     return gr.Dataframe(
         value=df,
     )
+def build_vqa_leaderboard(task_data: list):
+    cols = ["Accuracy", "Bias", "Hallucination", "Faithfulness", "Context Rel.", "Coherence"]
+    return build_task_leaderboard(task_data, cols, pct=True)
 def build_multilingual_leaderboard():
     <div id="page-header">
         <div id="header-container">
             <div id="left-container">
+                <a href="https://vectorinstitute.ai" target="_blank" rel="noopener noreferrer">
+                    <img id="vector-logo" src="/gradio_api/file={vector_logo_path}"
+                         alt="Vector Institute" onerror="this.style.display='none'">
+                </a>
             </div>
             <div id="centre-container">
                 <h1>HumaniBench Leaderboard</h1>
                 <p>A Human-Centric Evaluation Framework for Large Multimodal Models</p>
             </div>
+            <div id="right-container">
+                <img id="humanibench-logo" src="/gradio_api/file={humanibench_logo_path}"
+                     alt="HumaniBench" onerror="this.style.display='none'">
+            </div>
         </div>
     </div>
     """)
     gr.HTML(INTRODUCTION_HTML)
+    gr.HTML("""
+    <div style="text-align: center; margin: 1.5rem auto; max-width: 960px;">
+        <img src="/gradio_api/file=src/assets/teaser_figure_humanibench.png"
+             style="width: 100%; border-radius: 8px; box-shadow: 0 2px 12px rgba(0,0,0,0.12);"
+             alt="HumaniBench teaser figure">
+    </div>
+    """)
     with gr.Tabs():
         # ── Tab 1: Overall Rankings ──────────────────────────────────────────
                 </div>
             """, elem_classes="markdown-text")
             build_overall_leaderboard()
+            gr.Markdown("*Overall = mean of all 7 principle scores. -- indicates data not yet available.*")
         # ── Tab 2: Task Results ──────────────────────────────────────────────
         with gr.Tab("Task Results"):
             with gr.Tabs():
                 with gr.Tab("T1 · Scene Understanding"):
+                    gr.Markdown("**Metrics:** Accuracy (%) · Bias · Hallucination · Faithfulness · Context Rel. · Coherence")
+                    build_vqa_leaderboard(T1_DATA)
                 with gr.Tab("T2 · Instance Identity"):
+                    gr.Markdown("**Metrics:** Accuracy (%) · Bias · Hallucination · Faithfulness · Context Rel. · Coherence")
+                    build_vqa_leaderboard(T2_DATA)
                 with gr.Tab("T3 · MC-VQA"):
+                    gr.Markdown("**Metrics:** Accuracy (%) · Bias · Hallucination · Faithfulness · Context Rel. · Coherence")
+                    build_vqa_leaderboard(T3_DATA)
                 with gr.Tab("T4 · Multilingual"):
+                    gr.Markdown("**Metric:** Accuracy (%) across 11 languages · Avg = macro-average")
                     build_multilingual_leaderboard()
                 with gr.Tab("T5 · Visual Grounding"):
+                    gr.Markdown("**Metrics:** `mAP@0.5` (%) · `mAP@0.75` (%) · Mean IoU (0–1) · Missing Pred. (%) ↓")
+                    build_task_leaderboard(T5_DATA, T5_COLS, pct=False)
                 with gr.Tab("T6 · Empathetic Captioning"):
+                    gr.Markdown("**Metrics:** Empathy · Anxiety · Sadness · Joy (LLM-judge rubric, 0–100)")
+                    build_task_leaderboard(T6_DATA, T6_COLS, pct=False)
                 with gr.Tab("T7 · Image Resilience"):
+                    gr.Markdown("**Metrics:** Clean Acc. (%) · Perturbated Acc. (%) · Retention (%) = Perturbated / Clean × 100")
+                    build_task_leaderboard(T7_DATA, T7_COLS, pct=True)
+        # ── Tab 3: About ─────────────────────────────────────────────────────
         with gr.Tab("About"):
             gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
 if __name__ == "__main__":
+    demo.launch(allowed_paths=["src/assets"])

src/assets/HumaniBenchLogo.ico ADDED Viewed

src/assets/teaser_figure_humanibench.png ADDED Viewed

Git LFS Details

SHA256: 156f9c7e5b16e0cea48edea2c0a72c93400e5eef00959896013a772681b849ad
Pointer size: 132 Bytes
Size of remote file: 1.13 MB

src/assets/vector-favicon-48x48.svg ADDED Viewed

src/display/css_html_js.py CHANGED Viewed

@@ -11,23 +11,25 @@ function tableLinkHack() {
 custom_css = """
 :root {
-    --primary-color: #2563eb;
-    --secondary-color: #7c3aed;
-    --text-color: #1e293b;
-    --text-secondary: #64748b;
-    --border-color: #e2e8f0;
-    --hover-bg: #f8fafc;
-    --link-color: #2563eb;
 }
 * {
     font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
 }
-body {
-    background-color: #ffffff;
-    color: var(--text-color);
-    font-size: 16px;
 }
 .gradio-container {
@@ -36,15 +38,14 @@ body {
     padding: 0 30px;
 }
-footer {
-    visibility: hidden;
-}
 #page-header {
     text-align: center;
-    padding: 3rem 2rem 2rem 2rem;
     margin-bottom: 2rem;
-    border-bottom: 2px solid var(--border-color);
 }
 #header-container {
@@ -55,83 +56,80 @@ footer {
     margin: 0 auto;
 }
-#left-container {
-    flex: 0 0 auto;
-}
-#centre-container {
-    flex: 1;
-    text-align: center;
-}
 #right-container {
     flex: 0 0 auto;
     width: 150px;
 }
 #page-header h1 {
     font-size: 3rem;
     font-weight: 700;
-    color: var(--text-color);
     margin: 0 0 0.5rem 0;
 }
 #page-header p {
-    font-size: 1.4rem;
-    color: var(--text-secondary);
     margin: 0;
 }
-#left-container #black-logo,
-#left-container #white-logo {
-    height: 150px;
-    width: 150px;
 }
-#left-container #black-logo {
-    display: block;
-}
-#left-container #white-logo {
-    display: none;
 }
 .stats-container {
     display: grid;
-    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
-    gap: 2rem;
     max-width: 1300px;
     margin: 2rem auto;
     padding: 0 1rem;
 }
 .stat-box {
-    background: white;
-    border: 2px solid var(--border-color);
-    border-radius: 8px;
     padding: 1.5rem;
     text-align: center;
-    transition: transform 0.2s, box-shadow 0.2s;
 }
 .stat-box:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
 }
 .stat-value {
-    font-size: 3rem;
     font-weight: 700;
-    color: var(--primary-color);
-    margin-bottom: 0.5rem;
 }
 .stat-label {
-    font-size: 1.1rem;
-    color: var(--text-secondary);
     font-weight: 500;
 }
 .badges-container {
     display: flex;
     justify-content: center;
@@ -140,183 +138,221 @@ footer {
     flex-wrap: wrap;
 }
-.badges-container img {
-    height: 22px;
-}
 .tab-nav {
-    border-bottom: 2px solid var(--border-color);
     margin-bottom: 2rem;
 }
 .tab-nav button {
-    font-size: 1.15rem;
     font-weight: 600;
-    padding: 0.85rem 1.75rem;
     border: none;
     background: transparent;
-    color: var(--text-secondary);
-    border-bottom: 3px solid transparent;
     transition: all 0.2s;
 }
 .tab-nav button:hover {
-    color: var(--text-color);
-    background-color: var(--hover-bg);
 }
 .tab-nav button[aria-selected="true"] {
-    color: var(--primary-color);
-    border-bottom-color: var(--primary-color);
-    background-color: transparent;
 }
 .humani-leaderboard-table .table-wrap table.table {
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-    color: rgb(97, 97, 97);
-    overflow-y: auto;
-    overflow-x: auto;
     width: 100%;
-    table-layout: fixed;
 }
 .humani-leaderboard-table .table-wrap table.table a {
-    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-    color: var(--link-color);
-    font-weight: 700;
     text-decoration: none;
-    border-bottom: 1px dotted var(--link-color);
 }
 .humani-leaderboard-table .table-wrap table.table a:hover {
-    color: var(--secondary-color);
-    border-bottom-style: solid;
 }
-.humani-leaderboard-table .table-wrap table.table tr td,
-.humani-leaderboard-table .table-wrap table.table tr th {
-    border-bottom: 1px solid var(--border-color-primary);
-    padding: 1rem 0.8rem;
     text-align: center;
-    white-space: normal;
-    word-wrap: break-word;
 }
-.humani-leaderboard-table .table-wrap table.table th:nth-child(1),
-.humani-leaderboard-table .table-wrap table.table td:nth-child(1) {
-    width: 18% !important;
-    min-width: 18% !important;
-    max-width: 18% !important;
 }
-.humani-leaderboard-table .table-wrap table.table th:nth-child(2),
-.humani-leaderboard-table .table-wrap table.table td:nth-child(2) {
-    width: 12% !important;
-    min-width: 12% !important;
-    max-width: 12% !important;
 }
-.humani-leaderboard-table .table-wrap table.table th {
-    font-size: 0.95rem;
-    font-weight: 700;
-    text-transform: uppercase;
-    letter-spacing: 0.04em;
 }
 .humani-leaderboard-table .table-wrap table.table tbody td {
-    font-size: 1.05rem;
     font-weight: 500;
 }
 .info-box {
-    background-color: #eff6ff;
-    border-left: 4px solid var(--primary-color);
-    padding: 1.2rem 1.5rem;
     border-radius: 4px;
     margin: 1.5rem auto;
-    font-size: 1.05rem;
     max-width: 1400px;
 }
 .info-box h3 {
-    color: var(--primary-color);
     margin-top: 0;
-    font-size: 1.25rem;
 }
 .warning-box {
-    background-color: #fff7ed;
-    border-left: 4px solid #f97316;
-    padding: 1.2rem 1.5rem;
     border-radius: 4px;
     margin: 1.5rem auto;
-    font-size: 1.05rem;
     max-width: 1400px;
 }
 .warning-box h3 {
-    color: #f97316;
     margin-top: 0;
-    font-size: 1.25rem;
 }
 .markdown-text {
     line-height: 1.75;
-    color: var(--text-color);
     max-width: 1400px !important;
     margin: 0 auto;
-    font-size: 1.05rem;
     padding: 0 2rem;
 }
 .markdown-text h2 {
-    color: var(--primary-color);
-    font-size: 2rem;
     margin-top: 2.5rem;
     margin-bottom: 1rem;
-    padding-bottom: 0.5rem;
-    border-bottom: 2px solid var(--border-color);
 }
 .markdown-text h3 {
-    color: var(--text-color);
-    font-size: 1.5rem;
     margin-top: 1.75rem;
     margin-bottom: 0.75rem;
     font-weight: 600;
 }
-.markdown-text p {
-    margin-bottom: 1rem;
-    font-size: 1.05rem;
-}
 .markdown-text ul, .markdown-text ol {
     margin-bottom: 1rem;
     padding-left: 1.5rem;
 }
-.markdown-text li {
-    margin-bottom: 0.5rem;
-    font-size: 1.05rem;
-}
 .markdown-text code {
-    background-color: #f8fafc;
     padding: 0.2rem 0.4rem;
     border-radius: 3px;
-    font-size: 0.95em;
-    border: 1px solid var(--border-color);
 }
 .markdown-text pre {
-    background-color: #f8fafc;
     padding: 1rem;
     border-radius: 6px;
     overflow-x: auto;
-    border: 1px solid var(--border-color);
     margin: 1rem 0;
-    font-size: 0.95rem;
 }
 .markdown-text table {
@@ -327,457 +363,66 @@ footer {
 .markdown-text table th,
 .markdown-text table td {
-    padding: 0.75rem;
     text-align: left;
-    border: 1px solid var(--border-color);
 }
 .markdown-text table th {
-    background-color: #f8fafc;
     font-weight: 600;
 }
 .markdown-text table tr:nth-child(even) {
-    background-color: #fafbfc;
 }
 #footer {
     text-align: center;
     padding: 2.5rem 1rem;
     margin-top: 4rem;
-    border-top: 2px solid var(--border-color);
-    color: var(--text-secondary);
     font-size: 1rem;
 }
-#footer p {
-    margin: 0.5rem 0;
-}
 #footer a {
-    color: var(--link-color);
     text-decoration: none;
     font-weight: 600;
     margin: 0 0.5rem;
 }
-#footer a:hover {
-    text-decoration: underline;
-}
-@media (prefers-color-scheme: dark) {
-    :root {
-        --primary-color: #60a5fa;
-        --secondary-color: #a78bfa;
-        --text-color: #f1f5f9;
-        --text-secondary: #cbd5e1;
-        --border-color: #334155;
-        --hover-bg: #1e293b;
-        --link-color: #60a5fa;
-        --bg-primary: #0f172a;
-        --bg-secondary: #1e293b;
-        --bg-tertiary: #334155;
-    }
-    body {
-        background-color: var(--bg-primary) !important;
-        color: var(--text-color) !important;
-    }
-    .gradio-container {
-        background-color: var(--bg-primary) !important;
-        max-width: 1600px !important;
-    }
-    #page-header {
-        border-bottom-color: var(--border-color);
-    }
-    #page-header h1 {
-        color: #ffffff !important;
-        font-size: 3.5rem !important;
-    }
-    #page-header p {
-        color: var(--text-secondary) !important;
-        font-size: 1.5rem !important;
-    }
-    #left-container #black-logo {
-        display: none;
-    }
-    #left-container #white-logo {
-        display: block;
-    }
-    .stat-box {
-        background: var(--bg-secondary) !important;
-        border-color: var(--border-color) !important;
-    }
-    .stat-box:hover {
-        background: var(--bg-tertiary) !important;
-        box-shadow: 0 4px 12px rgba(96, 165, 250, 0.3) !important;
-    }
-    .stat-value {
-        color: #60a5fa !important;
-        font-size: 3.5rem !important;
-    }
-    .stat-label {
-        color: #94a3b8 !important;
-        font-size: 1.15rem !important;
-    }
-    .tab-nav button {
-        color: #94a3b8 !important;
-        background: transparent !important;
-        font-size: 1.25rem !important;
-    }
-    .tab-nav button:hover {
-        color: #ffffff !important;
-        background-color: var(--hover-bg) !important;
-    }
-    .tab-nav button[aria-selected="true"] {
-        color: #60a5fa !important;
-        border-bottom-color: #60a5fa !important;
-        background: transparent !important;
-    }
-    .humani-leaderboard-table .table-wrap table.table {
-        color: #ffffff;
-    }
-    .humani-leaderboard-table .table-wrap table.table a {
-        color: #60a5fa !important;
-    }
-    .humani-leaderboard-table .table-wrap table.table a:hover {
-        color: #93c5fd !important;
-    }
-    .humani-leaderboard-table .table-wrap table.table tr th {
-        color: #94a3b8 !important;
-        border-bottom: 2px solid #60a5fa !important;
-    }
-    .humani-leaderboard-table .table-wrap table.table tr td {
-        color: #f1f5f9 !important;
-    }
-    .info-box {
-        background-color: rgba(96, 165, 250, 0.1) !important;
-        border-left-color: #60a5fa !important;
-        color: #f1f5f9 !important;
-        font-size: 1.1rem !important;
-    }
-    .info-box h3 {
-        color: #60a5fa !important;
-        font-size: 1.35rem !important;
-    }
-    .warning-box {
-        background-color: rgba(251, 146, 60, 0.1) !important;
-        border-left-color: #fb923c !important;
-        color: #f1f5f9 !important;
-        font-size: 1.1rem !important;
-    }
-    .warning-box h3 {
-        color: #fb923c !important;
-        font-size: 1.35rem !important;
-    }
-    .markdown-text {
-        color: #f1f5f9 !important;
-        font-size: 1.1rem !important;
-        max-width: 1400px !important;
-    }
-    .markdown-text h2 {
-        color: #60a5fa !important;
-        border-bottom-color: var(--border-color) !important;
-        font-size: 2.25rem !important;
-    }
-    .markdown-text h3 {
-        color: #ffffff !important;
-        font-size: 1.65rem !important;
-    }
-    .markdown-text p, .markdown-text li {
-        color: #cbd5e1 !important;
-        font-size: 1.1rem !important;
-    }
-    .markdown-text a {
-        color: #60a5fa !important;
-        font-weight: 600 !important;
-    }
-    .markdown-text a:hover {
-        color: #93c5fd !important;
-    }
-    .markdown-text code {
-        background-color: var(--bg-tertiary) !important;
-        border-color: var(--border-color) !important;
-        color: #f1f5f9 !important;
-    }
-    .markdown-text pre {
-        background-color: var(--bg-secondary) !important;
-        border-color: var(--border-color) !important;
-    }
-    .markdown-text table th,
-    .markdown-text table td {
-        border-color: var(--border-color) !important;
-        color: #f1f5f9 !important;
-    }
-    .markdown-text table th {
-        background-color: var(--bg-secondary) !important;
-    }
-    .markdown-text table tr:nth-child(even) {
-        background-color: var(--bg-secondary) !important;
-    }
-    #footer {
-        border-top-color: var(--border-color) !important;
-        color: #94a3b8 !important;
-    }
-    #footer a {
-        color: #60a5fa !important;
-    }
-    #footer a:hover {
-        color: #a78bfa !important;
-    }
-}
-/* Explicit dark mode for HuggingFace Spaces */
-.dark,
-.dark .gradio-container,
-body.dark {
-    --primary-color: #60a5fa;
-    --secondary-color: #a78bfa;
-    --text-color: #f1f5f9;
-    --text-secondary: #cbd5e1;
-    --border-color: #334155;
-    --hover-bg: #1e293b;
-    --link-color: #60a5fa;
-    --bg-primary: #0f172a;
-    --bg-secondary: #1e293b;
-    --bg-tertiary: #334155;
-    background-color: var(--bg-primary) !important;
-    color: var(--text-color) !important;
-}
-.dark #page-header {
-    border-bottom-color: var(--border-color);
-}
-.dark #page-header h1 {
-    color: #ffffff !important;
-    font-size: 3.5rem !important;
-}
-.dark #page-header p {
-    color: var(--text-secondary) !important;
-    font-size: 1.5rem !important;
-}
-.dark #left-container #black-logo {
-    display: none;
-}
-.dark #left-container #white-logo {
-    display: block;
-}
-.dark .stat-box {
-    background: var(--bg-secondary) !important;
-    border-color: var(--border-color) !important;
-}
-.dark .stat-box:hover {
-    background: var(--bg-tertiary) !important;
-    box-shadow: 0 4px 12px rgba(96, 165, 250, 0.3) !important;
-}
-.dark .stat-value {
-    color: #60a5fa !important;
-    font-size: 3.5rem !important;
-}
-.dark .stat-label {
-    color: #94a3b8 !important;
-    font-size: 1.15rem !important;
-}
-.dark .tab-nav button {
-    color: #94a3b8 !important;
-    background: transparent !important;
-    font-size: 1.25rem !important;
-}
-.dark .tab-nav button:hover {
-    color: #ffffff !important;
-    background-color: var(--hover-bg) !important;
-}
-.dark .tab-nav button[aria-selected="true"] {
-    color: #60a5fa !important;
-    border-bottom-color: #60a5fa !important;
-    background: transparent !important;
-}
-.dark .humani-leaderboard-table .table-wrap table.table {
-    color: #f1f5f9;
-}
-.dark .humani-leaderboard-table .table-wrap table.table a {
-    color: #60a5fa !important;
-}
-.dark .humani-leaderboard-table .table-wrap table.table a:hover {
-    color: #93c5fd !important;
-}
-.dark .humani-leaderboard-table .table-wrap table.table tr th {
-    color: #94a3b8 !important;
-    border-bottom: 2px solid #60a5fa !important;
-}
-.dark .humani-leaderboard-table .table-wrap table.table tr td {
-    color: #f1f5f9 !important;
-}
-.dark .info-box {
-    background-color: rgba(96, 165, 250, 0.1) !important;
-    border-left-color: #60a5fa !important;
-    color: #f1f5f9 !important;
-}
-.dark .info-box h3 {
-    color: #60a5fa !important;
-}
-.dark .warning-box {
-    background-color: rgba(251, 146, 60, 0.1) !important;
-    border-left-color: #fb923c !important;
-    color: #f1f5f9 !important;
-}
-.dark .warning-box h3 {
-    color: #fb923c !important;
-}
-.dark .markdown-text {
-    color: #f1f5f9 !important;
-}
-.dark .markdown-text h2 {
-    color: #60a5fa !important;
-    border-bottom-color: var(--border-color) !important;
-}
-.dark .markdown-text h3 {
-    color: #ffffff !important;
-}
-.dark .markdown-text p,
-.dark .markdown-text li {
-    color: #cbd5e1 !important;
-}
-.dark .markdown-text a {
-    color: #60a5fa !important;
-}
-.dark .markdown-text a:hover {
-    color: #93c5fd !important;
-}
-.dark .markdown-text code {
-    background-color: var(--bg-tertiary) !important;
-    border-color: var(--border-color) !important;
-    color: #f1f5f9 !important;
-}
-.dark .markdown-text pre {
-    background-color: var(--bg-secondary) !important;
-    border-color: var(--border-color) !important;
-}
-.dark .markdown-text table th,
-.dark .markdown-text table td {
-    border-color: var(--border-color) !important;
-    color: #f1f5f9 !important;
-}
-.dark .markdown-text table th {
-    background-color: var(--bg-secondary) !important;
-}
-.dark .markdown-text table tr:nth-child(even) {
-    background-color: var(--bg-secondary) !important;
-}
-.dark #footer {
-    border-top-color: var(--border-color) !important;
-    color: #94a3b8 !important;
-}
-.dark #footer a {
-    color: #60a5fa !important;
-}
-.dark #footer a:hover {
-    color: #a78bfa !important;
-}
 @media (max-width: 768px) {
-    .gradio-container {
-        padding: 0 15px !important;
-    }
-    #header-container {
-        flex-direction: column;
-    }
-    #left-container,
-    #right-container {
         width: 100%;
         text-align: center;
     }
-    #left-container #black-logo,
-    #left-container #white-logo {
-        height: 100px;
-        width: 100px;
-        margin-bottom: 1rem;
-    }
-    #page-header h1 {
-        font-size: 1.75rem !important;
     }
-    #page-header p {
-        font-size: 1.1rem !important;
-    }
-    .stat-value {
-        font-size: 2rem !important;
-    }
     .stats-container {
         grid-template-columns: repeat(2, 1fr);

 custom_css = """
 :root {
+    --accent:      #EB088A;
+    --accent-dim:  rgba(235, 8, 138, 0.12);
+    --accent-glow: rgba(235, 8, 138, 0.25);
+    --bg-0:        #000000;
+    --bg-1:        #0d0d0d;
+    --bg-2:        #161616;
+    --bg-3:        #222222;
+    --border:      #2c2c2c;
+    --text:        #f0f0f0;
+    --text-muted:  #777777;
 }
 * {
     font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
 }
+body, .gradio-container, .main, .wrap {
+    background-color: var(--bg-0) !important;
+    color: var(--text) !important;
 }
 .gradio-container {
     padding: 0 30px;
 }
+footer { visibility: hidden; }
+/* ── PAGE HEADER ───────────────────────────────────────────── */
 #page-header {
     text-align: center;
+    padding: 3rem 2rem 2rem;
     margin-bottom: 2rem;
+    border-bottom: 1px solid var(--border);
 }
 #header-container {
     margin: 0 auto;
 }
+#left-container  { flex: 0 0 auto; }
+#centre-container { flex: 1; text-align: center; }
 #right-container {
     flex: 0 0 auto;
     width: 150px;
+    display: flex;
+    align-items: center;
+    justify-content: flex-end;
 }
 #page-header h1 {
     font-size: 3rem;
     font-weight: 700;
+    color: #ffffff;
     margin: 0 0 0.5rem 0;
 }
 #page-header p {
+    font-size: 1.3rem;
+    color: var(--text-muted);
     margin: 0;
 }
+#left-container #vector-logo {
+    height: 80px;
+    width: 80px;
+    object-fit: contain;
+    filter: brightness(0) invert(1);
 }
+#right-container #humanibench-logo {
+    height: 80px;
+    width: 80px;
+    object-fit: contain;
 }
+/* ── STATS ─────────────────────────────────────────────────── */
 .stats-container {
     display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 1.5rem;
     max-width: 1300px;
     margin: 2rem auto;
     padding: 0 1rem;
 }
 .stat-box {
+    background: var(--bg-2);
+    border: 1px solid var(--border);
+    border-radius: 10px;
     padding: 1.5rem;
     text-align: center;
+    transition: border-color 0.2s, box-shadow 0.2s;
 }
 .stat-box:hover {
+    border-color: var(--accent);
+    box-shadow: 0 0 18px var(--accent-glow);
 }
 .stat-value {
+    font-size: 2.5rem;
     font-weight: 700;
+    color: var(--accent);
+    margin-bottom: 0.4rem;
 }
 .stat-label {
+    font-size: 1rem;
+    color: var(--text-muted);
     font-weight: 500;
 }
+/* ── BADGES ────────────────────────────────────────────────── */
 .badges-container {
     display: flex;
     justify-content: center;
     flex-wrap: wrap;
 }
+.badges-container img { height: 22px; }
+/* ── TABS ──────────────────────────────────────────────────── */
 .tab-nav {
+    border-bottom: 1px solid var(--border);
     margin-bottom: 2rem;
 }
 .tab-nav button {
+    font-size: 1rem;
     font-weight: 600;
+    padding: 0.75rem 1.5rem;
     border: none;
     background: transparent;
+    color: var(--text-muted);
+    border-bottom: 2px solid transparent;
     transition: all 0.2s;
 }
 .tab-nav button:hover {
+    color: var(--text);
+    background: rgba(255,255,255,0.04);
 }
 .tab-nav button[aria-selected="true"] {
+    color: var(--accent);
+    border-bottom-color: var(--accent);
+    background: transparent;
+}
+/* ── TABLE ─────────────────────────────────────────────────── */
+.humani-leaderboard-table .table-wrap {
+    border-radius: 10px;
+    overflow: hidden;
+    border: 1px solid var(--border);
+    box-shadow: 0 4px 20px rgba(0,0,0,0.5);
 }
 .humani-leaderboard-table .table-wrap table.table {
+    font-family: inherit;
+    color: var(--text);
     width: 100%;
+    table-layout: auto;
+    border-collapse: collapse;
+    background: var(--bg-1);
 }
 .humani-leaderboard-table .table-wrap table.table a {
+    color: var(--accent);
+    font-weight: 600;
     text-decoration: none;
 }
 .humani-leaderboard-table .table-wrap table.table a:hover {
+    color: #ff3da5;
+    text-decoration: underline;
 }
+/* Header row */
+.humani-leaderboard-table .table-wrap table.table thead tr {
+    background: var(--bg-3);
+    border-bottom: 2px solid var(--accent);
+}
+.humani-leaderboard-table .table-wrap table.table th {
+    font-size: 0.78rem;
+    font-weight: 700;
+    text-transform: uppercase;
+    letter-spacing: 0.07em;
+    color: var(--text-muted) !important;
+    padding: 0.9rem 0.9rem;
     text-align: center;
+    border: none;
+    white-space: nowrap;
 }
+.humani-leaderboard-table .table-wrap table.table th:first-child {
+    text-align: left;
+    padding-left: 1.2rem;
+    color: var(--text) !important;
 }
+/* Body rows */
+.humani-leaderboard-table .table-wrap table.table tbody tr {
+    border-bottom: 1px solid var(--border);
+    transition: background-color 0.15s;
 }
+.humani-leaderboard-table .table-wrap table.table tbody tr:nth-child(even) {
+    background-color: var(--bg-2);
+}
+.humani-leaderboard-table .table-wrap table.table tbody tr:hover {
+    background-color: var(--accent-dim) !important;
+}
+.humani-leaderboard-table .table-wrap table.table tbody tr:last-child {
+    border-bottom: none;
 }
 .humani-leaderboard-table .table-wrap table.table tbody td {
+    font-size: 0.95rem;
     font-weight: 500;
+    color: var(--text);
+    padding: 0.75rem 0.9rem;
+    text-align: center;
+    white-space: nowrap;
+    border: none;
+}
+.humani-leaderboard-table .table-wrap table.table td:first-child {
+    text-align: left !important;
+    padding-left: 1.2rem;
+    font-weight: 600;
+    min-width: 160px;
 }
+/* ── INFO / WARNING BOXES ──────────────────────────────────── */
 .info-box {
+    background-color: var(--accent-dim);
+    border-left: 3px solid var(--accent);
+    padding: 1.1rem 1.5rem;
     border-radius: 4px;
     margin: 1.5rem auto;
+    font-size: 1rem;
     max-width: 1400px;
+    color: var(--text);
 }
 .info-box h3 {
+    color: var(--accent);
     margin-top: 0;
+    font-size: 1.1rem;
 }
 .warning-box {
+    background-color: var(--accent-dim);
+    border-left: 3px solid var(--accent);
+    padding: 1.1rem 1.5rem;
     border-radius: 4px;
     margin: 1.5rem auto;
+    font-size: 1rem;
     max-width: 1400px;
+    color: var(--text);
 }
 .warning-box h3 {
+    color: var(--accent);
     margin-top: 0;
+    font-size: 1.1rem;
 }
+/* ── MARKDOWN TEXT ─────────────────────────────────────────── */
 .markdown-text {
     line-height: 1.75;
+    color: var(--text);
     max-width: 1400px !important;
     margin: 0 auto;
+    font-size: 1rem;
     padding: 0 2rem;
 }
 .markdown-text h2 {
+    color: var(--text);
+    font-size: 1.8rem;
     margin-top: 2.5rem;
     margin-bottom: 1rem;
+    padding-bottom: 0.4rem;
+    border-bottom: 1px solid var(--border);
 }
 .markdown-text h3 {
+    color: var(--accent);
+    font-size: 1.3rem;
     margin-top: 1.75rem;
     margin-bottom: 0.75rem;
     font-weight: 600;
 }
+.markdown-text p  { margin-bottom: 1rem; color: var(--text); }
+.markdown-text li { margin-bottom: 0.4rem; color: var(--text); }
 .markdown-text ul, .markdown-text ol {
     margin-bottom: 1rem;
     padding-left: 1.5rem;
 }
+.markdown-text a       { color: #b0b0b0; text-decoration: underline; text-decoration-color: #444; }
+.markdown-text a:hover { color: var(--accent); text-decoration-color: var(--accent); }
 .markdown-text code {
+    background-color: var(--bg-3);
     padding: 0.2rem 0.4rem;
     border-radius: 3px;
+    font-size: 0.9em;
+    border: 1px solid var(--border);
+    color: var(--accent);
 }
 .markdown-text pre {
+    background-color: var(--bg-2);
     padding: 1rem;
     border-radius: 6px;
     overflow-x: auto;
+    border: 1px solid var(--border);
     margin: 1rem 0;
+    font-size: 0.9rem;
+}
+.markdown-text pre code {
+    color: var(--text-muted);
+    background: transparent;
+    border: none;
+    padding: 0;
+    font-size: inherit;
 }
 .markdown-text table {
 .markdown-text table th,
 .markdown-text table td {
+    padding: 0.65rem 0.9rem;
     text-align: left;
+    border: 1px solid var(--border);
+    color: var(--text);
 }
 .markdown-text table th {
+    background-color: var(--bg-3);
     font-weight: 600;
+    color: var(--accent);
 }
 .markdown-text table tr:nth-child(even) {
+    background-color: var(--bg-2);
 }
+/* ── FOOTER ────────────────────────────────────────────────── */
 #footer {
     text-align: center;
     padding: 2.5rem 1rem;
     margin-top: 4rem;
+    border-top: 1px solid var(--border);
+    color: var(--text-muted);
     font-size: 1rem;
 }
+#footer p { margin: 0.5rem 0; }
 #footer a {
+    color: var(--accent);
     text-decoration: none;
     font-weight: 600;
     margin: 0 0.5rem;
 }
+#footer a:hover { color: #ff3da5; }
+/* ── MOBILE ────────────────────────────────────────────────── */
 @media (max-width: 768px) {
+    .gradio-container { padding: 0 15px !important; }
+    #header-container { flex-direction: column; }
+    #left-container, #right-container {
         width: 100%;
         text-align: center;
+        justify-content: center;
     }
+    #left-container #vector-logo,
+    #right-container #humanibench-logo {
+        height: 60px;
+        width: 60px;
+        margin-bottom: 0.5rem;
     }
+    #page-header h1 { font-size: 1.75rem !important; }
+    #page-header p  { font-size: 1.1rem !important; }
+    .stat-value { font-size: 2rem !important; }
     .stats-container {
         grid-template-columns: repeat(2, 1fr);