Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +6 -6
src/streamlit_app.py
CHANGED
|
@@ -39,7 +39,7 @@ st.markdown(
|
|
| 39 |
</p>
|
| 40 |
<p style="font-size:20px;">
|
| 41 |
π <a href="https://arxiv.org/abs/2506.01241">Paper</a> | π» <a href="https://github.com/launchnlp/ExpertLongBench">GitHub</a> | π€ <a href="https://huggingface.co/datasets/launch/ExpertLongBench">Public Dataset</a> |
|
| 42 |
-
βοΈ <strong>Version</strong>: <strong>V1</strong> | <strong># Models</strong>:
|
| 43 |
</p>
|
| 44 |
</div>
|
| 45 |
''',
|
|
@@ -50,7 +50,7 @@ st.markdown(
|
|
| 50 |
@st.cache_data
|
| 51 |
def load_data(path):
|
| 52 |
df = pd.read_json(path, lines=True)
|
| 53 |
-
score_cols = [f"T{i}" for i in range(1,
|
| 54 |
df["Avg"] = df[score_cols].mean(axis=1).round(1)
|
| 55 |
# df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
|
| 56 |
# Compute rank per column (1 = best)
|
|
@@ -69,11 +69,11 @@ if selected_tier == 'F1':
|
|
| 69 |
df = load_data("src/models.json")
|
| 70 |
|
| 71 |
# Precompute max ranks for color scaling
|
| 72 |
-
score_cols = [f"T{i}" for i in range(1,
|
| 73 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
| 74 |
|
| 75 |
# Build raw HTML table
|
| 76 |
-
cols = ["Model"] + [f"T{i}" for i in range(1,
|
| 77 |
html = "<table style='border-collapse:collapse; width:100%; font-size:14px;'>"
|
| 78 |
# header
|
| 79 |
html += "<tr>" + "".join(f"<th style='padding:6px;'>{col}</th>" for col in cols) + "</tr>"
|
|
@@ -104,11 +104,11 @@ else:
|
|
| 104 |
df2 = load_data("src/model_acc.json")
|
| 105 |
|
| 106 |
# Precompute max ranks for color scaling
|
| 107 |
-
score_cols = [f"T{i}" for i in range(1,
|
| 108 |
max_ranks = {col: df2[f"{col}_rank"].max() for col in score_cols}
|
| 109 |
|
| 110 |
# Build raw HTML table
|
| 111 |
-
cols = ["Model"] + [f"T{i}" for i in range(1,
|
| 112 |
html = "<table style='border-collapse:collapse; width:100%; font-size:14px;'>"
|
| 113 |
# header
|
| 114 |
html += "<tr>" + "".join(f"<th style='padding:6px;'>{col}</th>" for col in cols) + "</tr>"
|
|
|
|
| 39 |
</p>
|
| 40 |
<p style="font-size:20px;">
|
| 41 |
π <a href="https://arxiv.org/abs/2506.01241">Paper</a> | π» <a href="https://github.com/launchnlp/ExpertLongBench">GitHub</a> | π€ <a href="https://huggingface.co/datasets/launch/ExpertLongBench">Public Dataset</a> |
|
| 42 |
+
βοΈ <strong>Version</strong>: <strong>V1</strong> | <strong># Models</strong>: 13 | Updated: <strong>Sept 2025</strong>
|
| 43 |
</p>
|
| 44 |
</div>
|
| 45 |
''',
|
|
|
|
| 50 |
@st.cache_data
|
| 51 |
def load_data(path):
|
| 52 |
df = pd.read_json(path, lines=True)
|
| 53 |
+
score_cols = [f"T{i}" for i in range(1, 13)]
|
| 54 |
df["Avg"] = df[score_cols].mean(axis=1).round(1)
|
| 55 |
# df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
|
| 56 |
# Compute rank per column (1 = best)
|
|
|
|
| 69 |
df = load_data("src/models.json")
|
| 70 |
|
| 71 |
# Precompute max ranks for color scaling
|
| 72 |
+
score_cols = [f"T{i}" for i in range(1, 13)] + ["Avg"]
|
| 73 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
| 74 |
|
| 75 |
# Build raw HTML table
|
| 76 |
+
cols = ["Model"] + [f"T{i}" for i in range(1,13)] + ["Avg"]
|
| 77 |
html = "<table style='border-collapse:collapse; width:100%; font-size:14px;'>"
|
| 78 |
# header
|
| 79 |
html += "<tr>" + "".join(f"<th style='padding:6px;'>{col}</th>" for col in cols) + "</tr>"
|
|
|
|
| 104 |
df2 = load_data("src/model_acc.json")
|
| 105 |
|
| 106 |
# Precompute max ranks for color scaling
|
| 107 |
+
score_cols = [f"T{i}" for i in range(1, 13)] + ["Avg"]
|
| 108 |
max_ranks = {col: df2[f"{col}_rank"].max() for col in score_cols}
|
| 109 |
|
| 110 |
# Build raw HTML table
|
| 111 |
+
cols = ["Model"] + [f"T{i}" for i in range(1,13)] + ["Avg"]
|
| 112 |
html = "<table style='border-collapse:collapse; width:100%; font-size:14px;'>"
|
| 113 |
# header
|
| 114 |
html += "<tr>" + "".join(f"<th style='padding:6px;'>{col}</th>" for col in cols) + "</tr>"
|