File size: 7,264 Bytes
41302a5
 
 
 
 
 
8c89a6d
6a914f4
3788f63
bcc3eb3
1869ab9
3788f63
4f3d3c6
293121e
b64b933
7c2ff6d
b64b933
 
554d373
 
7c2ff6d
 
83b66cb
554d373
b64b933
 
554d373
 
 
 
 
7c2ff6d
b64b933
83b66cb
 
08dccaf
2e8cfe5
bcd8088
151c6ad
2e8cfe5
 
7368e62
 
554d373
 
 
 
bcd8088
7368e62
41302a5
7368e62
 
41302a5
 
7368e62
 
 
 
 
2b8f77a
 
7368e62
 
 
16d5f45
 
554d373
 
 
 
 
 
 
16d5f45
83b66cb
35b35ab
8bc1087
b4a4ef7
f5e6a19
feb488c
 
 
2085233
 
 
3ee227f
2085233
feb488c
 
35b35ab
 
 
dbe0366
 
7c2ff6d
90ad64b
dbe0366
35b35ab
f830b7d
60a4a0f
35b35ab
 
 
 
 
1869ab9
5501468
 
1869ab9
f08bf9c
5501468
f08bf9c
 
5501468
 
f08bf9c
 
 
 
 
 
 
 
 
 
 
 
 
35b35ab
 
206cd4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feb488c
 
 
 
206cd4a
8c50270
 
 
f830b7d
feb488c
 
 
 
 
 
87f8f5e
206cd4a
90ad64b
8c50270
 
0f54608
feb488c
90ad64b
f830b7d
feb488c
8bc1087
 
 
 
d384e97
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# coding: utf-8

# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01

import json
import random 
import pandas as pd
import streamlit as st
from datasets import load_dataset
from datasets import get_dataset_config_names

st.title("Code:blue[Arena]")

problem_dict = dict()

# Venus Data
with st.spinner("Loading Venus data...", show_time=True):
    venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
    for problem in venus_ds:
        problem_id = problem["title"]
        problem['type'] = "leetcode"
        problem_dict[problem_id] = problem

# APPS Data
with st.spinner("Loading APPS data...", show_time=True):
    apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
    for problem in apps_ds:
        problem_id = f'apps_{problem["problem_id"]}'
        problem['type'] = "apps"
        problem_dict[problem_id] = problem
    
problem_count = len(problem_dict)


if "problem" in st.query_params:
    problem_id = str(st.query_params["problem"])
    problem_instance = problem_dict[problem_id]
    problem_type = problem_instance['type']

    st.header(problem_id)
    
    with st.expander("Problem Description"):
        if problem_type == "leetcode":
            st.markdown(problem_instance["question_content"])
        elif problem_type == "apps":
            st.markdown(problem_instance["problem_content"])

    with st.expander("Test Cases"):
        test_cases = json.loads(problem_instance["test_cases"])
        df = pd.DataFrame(
            {
                "input": [test_case['input'] for test_case in test_cases],
                "output": [test_case['output'] for test_case in test_cases],
            }
        )
        st.dataframe(
            df,
            column_config={
                "input": st.column_config.TextColumn("Input"),
                "output": st.column_config.TextColumn("Output"),
            },
            column_order=("input", "output"),
        )

    with st.expander("Test Case Generator"):
        if problem_type == "leetcode":
            test_case_generator = problem_instance["test_case_generator"]
            prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
            test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
            st.code(prompt+test_case_generator)
        else:
            st.code("Stay tuned!")

    
else:
    tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])

    with tab_problem:
        with st.spinner("Loading Framework...", show_time=True):
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
                    "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
                    "type": [str(problem['type']) for problem in problem_dict.values()],
                    "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
                    "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "difficulty": st.column_config.TextColumn("Difficulty", width='small'),
                "type": st.column_config.TextColumn("Type", width='small'),
                "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
                "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
            },
            height=800,
            column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
            hide_index=True,
        )

    with tab_submission:
        st.header("Submissions")
        models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
        model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
        st.write("You selected:", model_name)

        with st.spinner("Loading Data...", show_time=True):
            ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in ds],
                    "solution": [str(problem['solution']) for problem in ds],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "solution": st.column_config.TextColumn("Solution", width='big'),
            },
            height=800,
            column_order=("problem_id", "solution"),
            hide_index=True,
        )


    with tab_model:
        model_list = [
            "deepSeek-Coder",
            "GPT-4o",
            "Claude-3-5-sonnet",
            "Gemini-1.5-flash",
            "DeepSeek-Coder-V2-Lite",
            "Claude-3-Opus",
            "Gemini-1.5-pro",
            "Llama-3.1-8B",
            "Llama-3-8B",
            "GPT-4-Turbo",
            "GPT-3.5-Turbo",
            "Mistral-Nemo",
            "CodeLlama-13b",
            "Claude-3-Haiku",
            "Mistral-7B-v0.3",
            "Codestral-22B-v0.1",
            "Claude-3-sonnet",
            "CodeLlama-34b",
            "CodeLlama-7b"
        ]


        df = pd.DataFrame(
            {
                "model_name": [model_name for model_name in model_list],
                "dynamic_point": [0 for model_name in model_list],
                "pass@1": [0 for model_name in model_list],
                "beyond@t": [0 for model_name in model_list],
                "beyond@m": [0 for model_name in model_list],
                "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
            }
        )

        st.dataframe(
            df,
            column_config={
                "model_name": st.column_config.TextColumn("Model Name"),
                "dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
                "pass@1": st.column_config.NumberColumn("Pass@1"),
                "beyond@t": st.column_config.NumberColumn("Beyond@Time"),
                "beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
                "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
            },
            column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
            height=800,
        )

    with tab_about:
        st.write("Hello World!")
        st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
        st.write("🚧 WIP: We will update real data very soon!")