File size: 7,264 Bytes
41302a5 8c89a6d 6a914f4 3788f63 bcc3eb3 1869ab9 3788f63 4f3d3c6 293121e b64b933 7c2ff6d b64b933 554d373 7c2ff6d 83b66cb 554d373 b64b933 554d373 7c2ff6d b64b933 83b66cb 08dccaf 2e8cfe5 bcd8088 151c6ad 2e8cfe5 7368e62 554d373 bcd8088 7368e62 41302a5 7368e62 41302a5 7368e62 2b8f77a 7368e62 16d5f45 554d373 16d5f45 83b66cb 35b35ab 8bc1087 b4a4ef7 f5e6a19 feb488c 2085233 3ee227f 2085233 feb488c 35b35ab dbe0366 7c2ff6d 90ad64b dbe0366 35b35ab f830b7d 60a4a0f 35b35ab 1869ab9 5501468 1869ab9 f08bf9c 5501468 f08bf9c 5501468 f08bf9c 35b35ab 206cd4a feb488c 206cd4a 8c50270 f830b7d feb488c 87f8f5e 206cd4a 90ad64b 8c50270 0f54608 feb488c 90ad64b f830b7d feb488c 8bc1087 d384e97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# coding: utf-8
# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01
import json
import random
import pandas as pd
import streamlit as st
from datasets import load_dataset
from datasets import get_dataset_config_names
st.title("Code:blue[Arena]")
problem_dict = dict()
# Venus Data
with st.spinner("Loading Venus data...", show_time=True):
venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
for problem in venus_ds:
problem_id = problem["title"]
problem['type'] = "leetcode"
problem_dict[problem_id] = problem
# APPS Data
with st.spinner("Loading APPS data...", show_time=True):
apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
for problem in apps_ds:
problem_id = f'apps_{problem["problem_id"]}'
problem['type'] = "apps"
problem_dict[problem_id] = problem
problem_count = len(problem_dict)
if "problem" in st.query_params:
problem_id = str(st.query_params["problem"])
problem_instance = problem_dict[problem_id]
problem_type = problem_instance['type']
st.header(problem_id)
with st.expander("Problem Description"):
if problem_type == "leetcode":
st.markdown(problem_instance["question_content"])
elif problem_type == "apps":
st.markdown(problem_instance["problem_content"])
with st.expander("Test Cases"):
test_cases = json.loads(problem_instance["test_cases"])
df = pd.DataFrame(
{
"input": [test_case['input'] for test_case in test_cases],
"output": [test_case['output'] for test_case in test_cases],
}
)
st.dataframe(
df,
column_config={
"input": st.column_config.TextColumn("Input"),
"output": st.column_config.TextColumn("Output"),
},
column_order=("input", "output"),
)
with st.expander("Test Case Generator"):
if problem_type == "leetcode":
test_case_generator = problem_instance["test_case_generator"]
prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
st.code(prompt+test_case_generator)
else:
st.code("Stay tuned!")
else:
tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])
with tab_problem:
with st.spinner("Loading Framework...", show_time=True):
df = pd.DataFrame(
{
"problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
"difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
"type": [str(problem['type']) for problem in problem_dict.values()],
"problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
"acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
}
)
st.dataframe(
df,
column_config={
"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
"difficulty": st.column_config.TextColumn("Difficulty", width='small'),
"type": st.column_config.TextColumn("Type", width='small'),
"acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
"problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
},
height=800,
column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
hide_index=True,
)
with tab_submission:
st.header("Submissions")
models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
st.write("You selected:", model_name)
with st.spinner("Loading Data...", show_time=True):
ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
df = pd.DataFrame(
{
"problem_id": [int(problem['problem_id']) for problem in ds],
"solution": [str(problem['solution']) for problem in ds],
}
)
st.dataframe(
df,
column_config={
"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
"solution": st.column_config.TextColumn("Solution", width='big'),
},
height=800,
column_order=("problem_id", "solution"),
hide_index=True,
)
with tab_model:
model_list = [
"deepSeek-Coder",
"GPT-4o",
"Claude-3-5-sonnet",
"Gemini-1.5-flash",
"DeepSeek-Coder-V2-Lite",
"Claude-3-Opus",
"Gemini-1.5-pro",
"Llama-3.1-8B",
"Llama-3-8B",
"GPT-4-Turbo",
"GPT-3.5-Turbo",
"Mistral-Nemo",
"CodeLlama-13b",
"Claude-3-Haiku",
"Mistral-7B-v0.3",
"Codestral-22B-v0.1",
"Claude-3-sonnet",
"CodeLlama-34b",
"CodeLlama-7b"
]
df = pd.DataFrame(
{
"model_name": [model_name for model_name in model_list],
"dynamic_point": [0 for model_name in model_list],
"pass@1": [0 for model_name in model_list],
"beyond@t": [0 for model_name in model_list],
"beyond@m": [0 for model_name in model_list],
"model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
}
)
st.dataframe(
df,
column_config={
"model_name": st.column_config.TextColumn("Model Name"),
"dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
"pass@1": st.column_config.NumberColumn("Pass@1"),
"beyond@t": st.column_config.NumberColumn("Beyond@Time"),
"beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
"model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
},
column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
height=800,
)
with tab_about:
st.write("Hello World!")
st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
st.write("🚧 WIP: We will update real data very soon!")
|