Spaces:

Elfsong
/

CodeArena

Running

File size: 7,264 Bytes

41302a5
 
 
 
 
 
8c89a6d
6a914f4
3788f63
bcc3eb3
1869ab9
3788f63
4f3d3c6
293121e
b64b933
7c2ff6d
b64b933
 
554d373
 
7c2ff6d
 
83b66cb
554d373
b64b933
 
554d373
 
 
 
 
7c2ff6d
b64b933
83b66cb
 
08dccaf
2e8cfe5
bcd8088
151c6ad
2e8cfe5
 
7368e62
 
554d373
 
 
 
bcd8088
7368e62
41302a5
7368e62
 
41302a5
 
7368e62
 
 
 
 
2b8f77a
 
7368e62
 
 
16d5f45
 
554d373
 
 
 
 
 
 
16d5f45
83b66cb
35b35ab
8bc1087
b4a4ef7
f5e6a19
feb488c
 
 
2085233
 
 
3ee227f
2085233
feb488c
 
35b35ab
 
 
dbe0366
 
7c2ff6d
90ad64b
dbe0366
35b35ab
f830b7d
60a4a0f
35b35ab
 
 
 
 
1869ab9
5501468
 
1869ab9
f08bf9c
5501468
f08bf9c
 
5501468
 
f08bf9c
 
 
 
 
 
 
 
 
 
 
 
 
35b35ab
 
206cd4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feb488c
 
 
 
206cd4a
8c50270
 
 
f830b7d
feb488c
 
 
 
 
 
87f8f5e
206cd4a
90ad64b
8c50270
 
0f54608
feb488c
90ad64b
f830b7d
feb488c
8bc1087
 
 
 
d384e97

# coding: utf-8

# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01

import json
import random 
import pandas as pd
import streamlit as st
from datasets import load_dataset
from datasets import get_dataset_config_names

st.title("Code:blue[Arena]")

problem_dict = dict()

# Venus Data
with st.spinner("Loading Venus data...", show_time=True):
    venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
    for problem in venus_ds:
        problem_id = problem["title"]
        problem['type'] = "leetcode"
        problem_dict[problem_id] = problem

# APPS Data
with st.spinner("Loading APPS data...", show_time=True):
    apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
    for problem in apps_ds:
        problem_id = f'apps_{problem["problem_id"]}'
        problem['type'] = "apps"
        problem_dict[problem_id] = problem
    
problem_count = len(problem_dict)


if "problem" in st.query_params:
    problem_id = str(st.query_params["problem"])
    problem_instance = problem_dict[problem_id]
    problem_type = problem_instance['type']

    st.header(problem_id)
    
    with st.expander("Problem Description"):
        if problem_type == "leetcode":
            st.markdown(problem_instance["question_content"])
        elif problem_type == "apps":
            st.markdown(problem_instance["problem_content"])

    with st.expander("Test Cases"):
        test_cases = json.loads(problem_instance["test_cases"])
        df = pd.DataFrame(
            {
                "input": [test_case['input'] for test_case in test_cases],
                "output": [test_case['output'] for test_case in test_cases],
            }
        )
        st.dataframe(
            df,
            column_config={
                "input": st.column_config.TextColumn("Input"),
                "output": st.column_config.TextColumn("Output"),
            },
            column_order=("input", "output"),
        )

    with st.expander("Test Case Generator"):
        if problem_type == "leetcode":
            test_case_generator = problem_instance["test_case_generator"]
            prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
            test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
            st.code(prompt+test_case_generator)
        else:
            st.code("Stay tuned!")

    
else:
    tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])

    with tab_problem:
        with st.spinner("Loading Framework...", show_time=True):
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
                    "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
                    "type": [str(problem['type']) for problem in problem_dict.values()],
                    "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
                    "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "difficulty": st.column_config.TextColumn("Difficulty", width='small'),
                "type": st.column_config.TextColumn("Type", width='small'),
                "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
                "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
            },
            height=800,
            column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
            hide_index=True,
        )

    with tab_submission:
        st.header("Submissions")
        models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
        model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
        st.write("You selected:", model_name)

        with st.spinner("Loading Data...", show_time=True):
            ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in ds],
                    "solution": [str(problem['solution']) for problem in ds],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "solution": st.column_config.TextColumn("Solution", width='big'),
            },
            height=800,
            column_order=("problem_id", "solution"),
            hide_index=True,
        )


    with tab_model:
        model_list = [
            "deepSeek-Coder",
            "GPT-4o",
            "Claude-3-5-sonnet",
            "Gemini-1.5-flash",
            "DeepSeek-Coder-V2-Lite",
            "Claude-3-Opus",
            "Gemini-1.5-pro",
            "Llama-3.1-8B",
            "Llama-3-8B",
            "GPT-4-Turbo",
            "GPT-3.5-Turbo",
            "Mistral-Nemo",
            "CodeLlama-13b",
            "Claude-3-Haiku",
            "Mistral-7B-v0.3",
            "Codestral-22B-v0.1",
            "Claude-3-sonnet",
            "CodeLlama-34b",
            "CodeLlama-7b"
        ]


        df = pd.DataFrame(
            {
                "model_name": [model_name for model_name in model_list],
                "dynamic_point": [0 for model_name in model_list],
                "pass@1": [0 for model_name in model_list],
                "beyond@t": [0 for model_name in model_list],
                "beyond@m": [0 for model_name in model_list],
                "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
            }
        )

        st.dataframe(
            df,
            column_config={
                "model_name": st.column_config.TextColumn("Model Name"),
                "dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
                "pass@1": st.column_config.NumberColumn("Pass@1"),
                "beyond@t": st.column_config.NumberColumn("Beyond@Time"),
                "beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
                "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
            },
            column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
            height=800,
        )

    with tab_about:
        st.write("Hello World!")
        st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
        st.write("🚧 WIP: We will update real data very soon!")