Spaces:
Restarting
Restarting
Update space
Browse files- .idea/.gitignore +8 -0
- .idea/Leaderboard.iml +12 -0
- .idea/inspectionProfiles/Project_Default.xml +12 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- all_dimensions/Audience.jsonl +10 -0
- all_dimensions/Format.jsonl +10 -0
- all_dimensions/Keyword.jsonl +10 -0
- all_dimensions/Language.jsonl +10 -0
- all_dimensions/Length.jsonl +10 -0
- all_dimensions/Source.jsonl +10 -0
- app.py +55 -188
- utils.py +147 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Editor-based HTTP Client requests
|
5 |
+
/httpRequests/
|
6 |
+
# Datasource local storage ignored files
|
7 |
+
/dataSources/
|
8 |
+
/dataSources.local.xml
|
.idea/Leaderboard.iml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
<component name="PyDocumentationSettings">
|
9 |
+
<option name="format" value="PLAIN" />
|
10 |
+
<option name="myDocStringFormat" value="Plain" />
|
11 |
+
</component>
|
12 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
5 |
+
<option name="ignoredIdentifiers">
|
6 |
+
<list>
|
7 |
+
<option value="db_file" />
|
8 |
+
</list>
|
9 |
+
</option>
|
10 |
+
</inspection_tool>
|
11 |
+
</profile>
|
12 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Black">
|
4 |
+
<option name="sdkName" value="leaderboard" />
|
5 |
+
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="leaderboard" project-jdk-type="Python SDK" />
|
7 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/Leaderboard.iml" filepath="$PROJECT_DIR$/.idea/Leaderboard.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
</component>
|
6 |
+
</project>
|
all_dimensions/Audience.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.99, "SICR": 0.94, "nDCG@10(Original)": 0.89, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.97, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.09}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.84, "SICR": 0.21, "nDCG@10(Original)": 0.5, "nDCG@10(Instructed)": 0.68, "nDCG@10(Reversely Instructed)": 0.86, "MRR@1(Original)": 0.79, "MRR@1(Instructed)": 0.91, "MRR@1(Reversely Instructed)": 0.12}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.71, "SICR": 0.49, "nDCG@10(Original)": 0.74, "nDCG@10(Instructed)": 0.35, "nDCG@10(Reversely Instructed)": 0.56, "MRR@1(Original)": 0.34, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.27}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.52, "SICR": 0.99, "nDCG@10(Original)": 0.08, "nDCG@10(Instructed)": 0.2, "nDCG@10(Reversely Instructed)": 0.55, "MRR@1(Original)": 0.09, "MRR@1(Instructed)": 0.58, "MRR@1(Reversely Instructed)": 0.59}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.39, "SICR": 0.61, "nDCG@10(Original)": 0.21, "nDCG@10(Instructed)": 0.46, "nDCG@10(Reversely Instructed)": 0.13, "MRR@1(Original)": 0.35, "MRR@1(Instructed)": 0.8, "MRR@1(Reversely Instructed)": 0.1}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.35, "SICR": 0.82, "nDCG@10(Original)": 0.42, "nDCG@10(Instructed)": 0.92, "nDCG@10(Reversely Instructed)": 0.17, "MRR@1(Original)": 0.39, "MRR@1(Instructed)": 0.24, "MRR@1(Reversely Instructed)": 0.32}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.26, "SICR": 0.17, "nDCG@10(Original)": 0.68, "nDCG@10(Instructed)": 0.73, "nDCG@10(Reversely Instructed)": 0.58, "MRR@1(Original)": 0.13, "MRR@1(Instructed)": 0.62, "MRR@1(Reversely Instructed)": 0.9}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.22, "SICR": 0.69, "nDCG@10(Original)": 0.44, "nDCG@10(Instructed)": 0.25, "nDCG@10(Reversely Instructed)": 0.99, "MRR@1(Original)": 0.41, "MRR@1(Instructed)": 0.35, "MRR@1(Reversely Instructed)": 0.16}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.13, "SICR": 0.13, "nDCG@10(Original)": 0.71, "nDCG@10(Instructed)": 0.85, "nDCG@10(Reversely Instructed)": 0.09, "MRR@1(Original)": 0.08, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.09}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.01, "SICR": 0.65, "nDCG@10(Original)": 0.01, "nDCG@10(Instructed)": 0.35, "nDCG@10(Reversely Instructed)": 0.82, "MRR@1(Original)": 0.69, "MRR@1(Instructed)": 0.94, "MRR@1(Reversely Instructed)": 0.96}
|
all_dimensions/Format.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.92, "SICR": 0.33, "nDCG@10(Original)": 0.02, "nDCG@10(Instructed)": 0.23, "nDCG@10(Reversely Instructed)": 0.65, "MRR@1(Original)": 0.1, "MRR@1(Instructed)": 0.1, "MRR@1(Reversely Instructed)": 0.51}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.88, "SICR": 0.35, "nDCG@10(Original)": 0.69, "nDCG@10(Instructed)": 0.48, "nDCG@10(Reversely Instructed)": 0.25, "MRR@1(Original)": 0.3, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.03}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.81, "SICR": 0.32, "nDCG@10(Original)": 0.22, "nDCG@10(Instructed)": 0.67, "nDCG@10(Reversely Instructed)": 0.79, "MRR@1(Original)": 0.59, "MRR@1(Instructed)": 0.97, "MRR@1(Reversely Instructed)": 0.05}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.7, "SICR": 0.69, "nDCG@10(Original)": 0.82, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.08, "MRR@1(Original)": 0.57, "MRR@1(Instructed)": 0.54, "MRR@1(Reversely Instructed)": 0.34}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.67, "SICR": 0.04, "nDCG@10(Original)": 0.61, "nDCG@10(Instructed)": 0.28, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.85, "MRR@1(Reversely Instructed)": 0.09}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.56, "SICR": 0.86, "nDCG@10(Original)": 0.98, "nDCG@10(Instructed)": 0.22, "nDCG@10(Reversely Instructed)": 0.95, "MRR@1(Original)": 0.69, "MRR@1(Instructed)": 0.63, "MRR@1(Reversely Instructed)": 0.22}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.33, "SICR": 0.92, "nDCG@10(Original)": 0.38, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.09, "MRR@1(Original)": 0.26, "MRR@1(Instructed)": 0.89, "MRR@1(Reversely Instructed)": 0.79}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.21, "SICR": 0.82, "nDCG@10(Original)": 0.85, "nDCG@10(Instructed)": 0.97, "nDCG@10(Reversely Instructed)": 0.69, "MRR@1(Original)": 0.81, "MRR@1(Instructed)": 0.49, "MRR@1(Reversely Instructed)": 0.43}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.15, "SICR": 0.94, "nDCG@10(Original)": 0.02, "nDCG@10(Instructed)": 0.9, "nDCG@10(Reversely Instructed)": 0.95, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.32, "MRR@1(Reversely Instructed)": 0.12}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.11, "SICR": 0.06, "nDCG@10(Original)": 0.09, "nDCG@10(Instructed)": 0.72, "nDCG@10(Reversely Instructed)": 0.61, "MRR@1(Original)": 0.16, "MRR@1(Instructed)": 0.29, "MRR@1(Reversely Instructed)": 0.61}
|
all_dimensions/Keyword.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.83, "SICR": 0.8, "nDCG@10(Original)": 0.61, "nDCG@10(Instructed)": 0.19, "nDCG@10(Reversely Instructed)": 0.46, "MRR@1(Original)": 0.13, "MRR@1(Instructed)": 0.37, "MRR@1(Reversely Instructed)": 0.97}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.78, "SICR": 0.56, "nDCG@10(Original)": 0.73, "nDCG@10(Instructed)": 0.49, "nDCG@10(Reversely Instructed)": 0.34, "MRR@1(Original)": 0.18, "MRR@1(Instructed)": 0.2, "MRR@1(Reversely Instructed)": 0.52}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.76, "SICR": 0.12, "nDCG@10(Original)": 0.05, "nDCG@10(Instructed)": 0.24, "nDCG@10(Reversely Instructed)": 0.89, "MRR@1(Original)": 0.03, "MRR@1(Instructed)": 0.99, "MRR@1(Reversely Instructed)": 0.45}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.73, "SICR": 0.35, "nDCG@10(Original)": 0.31, "nDCG@10(Instructed)": 0.18, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.01, "MRR@1(Instructed)": 0.26, "MRR@1(Reversely Instructed)": 0.73}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.62, "SICR": 0.58, "nDCG@10(Original)": 0.66, "nDCG@10(Instructed)": 0.58, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.64, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.68}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.54, "SICR": 0.42, "nDCG@10(Original)": 0.34, "nDCG@10(Instructed)": 0.03, "nDCG@10(Reversely Instructed)": 0.88, "MRR@1(Original)": 0.99, "MRR@1(Instructed)": 0.27, "MRR@1(Reversely Instructed)": 0.37}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.47, "SICR": 0.43, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.61, "nDCG@10(Reversely Instructed)": 0.49, "MRR@1(Original)": 0.23, "MRR@1(Instructed)": 0.94, "MRR@1(Reversely Instructed)": 0.41}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.34, "SICR": 0.49, "nDCG@10(Original)": 0.15, "nDCG@10(Instructed)": 0.98, "nDCG@10(Reversely Instructed)": 0.9, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.29, "MRR@1(Reversely Instructed)": 0.73}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.14, "SICR": 0.92, "nDCG@10(Original)": 0.39, "nDCG@10(Instructed)": 0.1, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.35, "MRR@1(Instructed)": 0.36, "MRR@1(Reversely Instructed)": 1.0}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.06, "SICR": 0.16, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.44, "nDCG@10(Reversely Instructed)": 0.53, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.64, "MRR@1(Reversely Instructed)": 0.14}
|
all_dimensions/Language.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.74, "SICR": 0.41, "nDCG@10(Original)": 0.97, "nDCG@10(Instructed)": 0.43, "nDCG@10(Reversely Instructed)": 0.2, "MRR@1(Original)": 0.45, "MRR@1(Instructed)": 0.41, "MRR@1(Reversely Instructed)": 0.88}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.65, "SICR": 0.88, "nDCG@10(Original)": 0.75, "nDCG@10(Instructed)": 0.14, "nDCG@10(Reversely Instructed)": 0.43, "MRR@1(Original)": 0.05, "MRR@1(Instructed)": 0.75, "MRR@1(Reversely Instructed)": 0.12}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.65, "SICR": 0.25, "nDCG@10(Original)": 0.95, "nDCG@10(Instructed)": 0.14, "nDCG@10(Reversely Instructed)": 0.94, "MRR@1(Original)": 0.81, "MRR@1(Instructed)": 0.37, "MRR@1(Reversely Instructed)": 0.28}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.48, "SICR": 0.38, "nDCG@10(Original)": 0.37, "nDCG@10(Instructed)": 0.32, "nDCG@10(Reversely Instructed)": 0.29, "MRR@1(Original)": 0.26, "MRR@1(Instructed)": 0.51, "MRR@1(Reversely Instructed)": 0.84}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.24, "SICR": 0.26, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.89, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.73, "MRR@1(Instructed)": 0.84, "MRR@1(Reversely Instructed)": 0.79}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.22, "SICR": 0.77, "nDCG@10(Original)": 0.71, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.21, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.82, "MRR@1(Reversely Instructed)": 0.2}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.18, "SICR": 0.25, "nDCG@10(Original)": 0.53, "nDCG@10(Instructed)": 0.64, "nDCG@10(Reversely Instructed)": 0.58, "MRR@1(Original)": 0.32, "MRR@1(Instructed)": 0.41, "MRR@1(Reversely Instructed)": 0.15}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.09, "SICR": 0.43, "nDCG@10(Original)": 0.82, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.43, "MRR@1(Original)": 0.03, "MRR@1(Instructed)": 0.53, "MRR@1(Reversely Instructed)": 0.83}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.07, "SICR": 0.14, "nDCG@10(Original)": 0.17, "nDCG@10(Instructed)": 0.86, "nDCG@10(Reversely Instructed)": 0.9, "MRR@1(Original)": 0.93, "MRR@1(Instructed)": 0.99, "MRR@1(Reversely Instructed)": 0.96}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.05, "SICR": 0.29, "nDCG@10(Original)": 0.45, "nDCG@10(Instructed)": 0.74, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.23, "MRR@1(Instructed)": 0.65, "MRR@1(Reversely Instructed)": 0.46}
|
all_dimensions/Length.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.98, "SICR": 0.77, "nDCG@10(Original)": 0.28, "nDCG@10(Instructed)": 0.09, "nDCG@10(Reversely Instructed)": 0.32, "MRR@1(Original)": 0.25, "MRR@1(Instructed)": 0.22, "MRR@1(Reversely Instructed)": 0.81}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.97, "SICR": 0.51, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.41, "nDCG@10(Reversely Instructed)": 0.4, "MRR@1(Original)": 0.21, "MRR@1(Instructed)": 0.14, "MRR@1(Reversely Instructed)": 0.9}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.95, "SICR": 0.8, "nDCG@10(Original)": 0.72, "nDCG@10(Instructed)": 0.32, "nDCG@10(Reversely Instructed)": 0.08, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.05, "MRR@1(Reversely Instructed)": 0.3}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.89, "SICR": 0.35, "nDCG@10(Original)": 0.65, "nDCG@10(Instructed)": 0.19, "nDCG@10(Reversely Instructed)": 0.88, "MRR@1(Original)": 0.06, "MRR@1(Instructed)": 0.9, "MRR@1(Reversely Instructed)": 0.02}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.78, "SICR": 0.18, "nDCG@10(Original)": 0.28, "nDCG@10(Instructed)": 0.89, "nDCG@10(Reversely Instructed)": 0.34, "MRR@1(Original)": 0.8, "MRR@1(Instructed)": 0.49, "MRR@1(Reversely Instructed)": 0.01}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.77, "SICR": 0.46, "nDCG@10(Original)": 0.98, "nDCG@10(Instructed)": 0.99, "nDCG@10(Reversely Instructed)": 0.19, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.11, "MRR@1(Reversely Instructed)": 0.36}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.75, "SICR": 0.88, "nDCG@10(Original)": 0.07, "nDCG@10(Instructed)": 0.75, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.57, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.38}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.61, "SICR": 0.27, "nDCG@10(Original)": 0.1, "nDCG@10(Instructed)": 0.11, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.3, "MRR@1(Instructed)": 0.96, "MRR@1(Reversely Instructed)": 0.58}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.2, "SICR": 0.73, "nDCG@10(Original)": 0.68, "nDCG@10(Instructed)": 0.01, "nDCG@10(Reversely Instructed)": 0.12, "MRR@1(Original)": 0.7, "MRR@1(Instructed)": 0.62, "MRR@1(Reversely Instructed)": 0.79}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.08, "SICR": 0.21, "nDCG@10(Original)": 0.92, "nDCG@10(Instructed)": 0.91, "nDCG@10(Reversely Instructed)": 0.81, "MRR@1(Original)": 0.16, "MRR@1(Instructed)": 0.01, "MRR@1(Reversely Instructed)": 0.22}
|
all_dimensions/Source.jsonl
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.84, "SICR": 0.72, "nDCG@10(Original)": 0.31, "nDCG@10(Instructed)": 0.18, "nDCG@10(Reversely Instructed)": 0.75, "MRR@1(Original)": 0.48, "MRR@1(Instructed)": 0.98, "MRR@1(Reversely Instructed)": 0.86}
|
2 |
+
{"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.69, "SICR": 0.62, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.5, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.67, "MRR@1(Reversely Instructed)": 0.81}
|
3 |
+
{"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.61, "SICR": 0.84, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.03, "MRR@1(Original)": 0.24, "MRR@1(Instructed)": 0.59, "MRR@1(Reversely Instructed)": 0.15}
|
4 |
+
{"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.6, "SICR": 0.86, "nDCG@10(Original)": 0.46, "nDCG@10(Instructed)": 0.8, "nDCG@10(Reversely Instructed)": 0.21, "MRR@1(Original)": 0.55, "MRR@1(Instructed)": 0.26, "MRR@1(Reversely Instructed)": 0.42}
|
5 |
+
{"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.55, "SICR": 0.73, "nDCG@10(Original)": 0.23, "nDCG@10(Instructed)": 0.56, "nDCG@10(Reversely Instructed)": 0.47, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.23, "MRR@1(Reversely Instructed)": 0.83}
|
6 |
+
{"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.48, "SICR": 0.1, "nDCG@10(Original)": 0.12, "nDCG@10(Instructed)": 0.56, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.67, "MRR@1(Instructed)": 0.66, "MRR@1(Reversely Instructed)": 0.02}
|
7 |
+
{"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.32, "SICR": 0.77, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.83, "nDCG@10(Reversely Instructed)": 0.13, "MRR@1(Original)": 0.14, "MRR@1(Instructed)": 0.0, "MRR@1(Reversely Instructed)": 0.08}
|
8 |
+
{"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.29, "SICR": 0.42, "nDCG@10(Original)": 0.45, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.7, "MRR@1(Original)": 0.66, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.44}
|
9 |
+
{"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.17, "SICR": 0.91, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.7, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.88, "MRR@1(Reversely Instructed)": 0.55}
|
10 |
+
{"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.13, "SICR": 0.44, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.93, "nDCG@10(Reversely Instructed)": 0.28, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.43, "MRR@1(Reversely Instructed)": 0.44}
|
app.py
CHANGED
@@ -2,201 +2,68 @@ import gradio as gr
|
|
2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
-
|
6 |
-
|
7 |
-
from src.about import (
|
8 |
-
CITATION_BUTTON_LABEL,
|
9 |
-
CITATION_BUTTON_TEXT,
|
10 |
-
EVALUATION_QUEUE_TEXT,
|
11 |
-
INTRODUCTION_TEXT,
|
12 |
-
LLM_BENCHMARKS_TEXT,
|
13 |
-
TITLE,
|
14 |
-
)
|
15 |
-
from src.display.css_html_js import custom_css
|
16 |
-
from src.display.utils import (
|
17 |
-
BENCHMARK_COLS,
|
18 |
-
COLS,
|
19 |
-
EVAL_COLS,
|
20 |
-
EVAL_TYPES,
|
21 |
-
AutoEvalColumn,
|
22 |
-
ModelType,
|
23 |
-
fields,
|
24 |
-
WeightType,
|
25 |
-
Precision
|
26 |
-
)
|
27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
28 |
-
|
29 |
-
from
|
30 |
|
31 |
|
32 |
def restart_space():
|
33 |
API.restart_space(repo_id=REPO_ID)
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
)
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
)
|
86 |
-
],
|
87 |
-
bool_checkboxgroup_label="Hide models",
|
88 |
-
interactive=False,
|
89 |
-
)
|
90 |
-
|
91 |
-
|
92 |
-
demo = gr.Blocks(css=custom_css)
|
93 |
-
with demo:
|
94 |
-
gr.HTML(TITLE)
|
95 |
-
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
96 |
-
|
97 |
-
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
98 |
-
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
99 |
-
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
100 |
-
|
101 |
-
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
102 |
-
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
103 |
-
|
104 |
-
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
105 |
-
with gr.Column():
|
106 |
-
with gr.Row():
|
107 |
-
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
108 |
-
|
109 |
-
with gr.Column():
|
110 |
-
with gr.Accordion(
|
111 |
-
f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
112 |
-
open=False,
|
113 |
-
):
|
114 |
-
with gr.Row():
|
115 |
-
finished_eval_table = gr.components.Dataframe(
|
116 |
-
value=finished_eval_queue_df,
|
117 |
-
headers=EVAL_COLS,
|
118 |
-
datatype=EVAL_TYPES,
|
119 |
-
row_count=5,
|
120 |
-
)
|
121 |
-
with gr.Accordion(
|
122 |
-
f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
|
123 |
-
open=False,
|
124 |
-
):
|
125 |
-
with gr.Row():
|
126 |
-
running_eval_table = gr.components.Dataframe(
|
127 |
-
value=running_eval_queue_df,
|
128 |
-
headers=EVAL_COLS,
|
129 |
-
datatype=EVAL_TYPES,
|
130 |
-
row_count=5,
|
131 |
-
)
|
132 |
-
|
133 |
-
with gr.Accordion(
|
134 |
-
f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
135 |
-
open=False,
|
136 |
-
):
|
137 |
-
with gr.Row():
|
138 |
-
pending_eval_table = gr.components.Dataframe(
|
139 |
-
value=pending_eval_queue_df,
|
140 |
-
headers=EVAL_COLS,
|
141 |
-
datatype=EVAL_TYPES,
|
142 |
-
row_count=5,
|
143 |
-
)
|
144 |
-
with gr.Row():
|
145 |
-
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
146 |
-
|
147 |
-
with gr.Row():
|
148 |
-
with gr.Column():
|
149 |
-
model_name_textbox = gr.Textbox(label="Model name")
|
150 |
-
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
151 |
-
model_type = gr.Dropdown(
|
152 |
-
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
153 |
-
label="Model type",
|
154 |
-
multiselect=False,
|
155 |
-
value=None,
|
156 |
-
interactive=True,
|
157 |
-
)
|
158 |
-
|
159 |
-
with gr.Column():
|
160 |
-
precision = gr.Dropdown(
|
161 |
-
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
162 |
-
label="Precision",
|
163 |
-
multiselect=False,
|
164 |
-
value="float16",
|
165 |
-
interactive=True,
|
166 |
-
)
|
167 |
-
weight_type = gr.Dropdown(
|
168 |
-
choices=[i.value.name for i in WeightType],
|
169 |
-
label="Weights type",
|
170 |
-
multiselect=False,
|
171 |
-
value="Original",
|
172 |
-
interactive=True,
|
173 |
-
)
|
174 |
-
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
175 |
-
|
176 |
-
submit_button = gr.Button("Submit Eval")
|
177 |
submission_result = gr.Markdown()
|
178 |
-
submit_button.click(
|
179 |
-
add_new_eval,
|
180 |
-
[
|
181 |
-
model_name_textbox,
|
182 |
-
base_model_name_textbox,
|
183 |
-
revision_name_textbox,
|
184 |
-
precision,
|
185 |
-
weight_type,
|
186 |
-
model_type,
|
187 |
-
],
|
188 |
-
submission_result,
|
189 |
-
)
|
190 |
-
|
191 |
-
with gr.Row():
|
192 |
-
with gr.Accordion("📙 Citation", open=False):
|
193 |
-
citation_button = gr.Textbox(
|
194 |
-
value=CITATION_BUTTON_TEXT,
|
195 |
-
label=CITATION_BUTTON_LABEL,
|
196 |
-
lines=20,
|
197 |
-
elem_id="citation-button",
|
198 |
-
show_copy_button=True,
|
199 |
-
)
|
200 |
|
201 |
scheduler = BackgroundScheduler()
|
202 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
|
|
2 |
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
5 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
7 |
+
|
8 |
+
from utils import get_data, submit
|
9 |
|
10 |
|
11 |
def restart_space():
|
12 |
API.restart_space(repo_id=REPO_ID)
|
13 |
|
14 |
+
|
15 |
+
dimensions = ['Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source']
|
16 |
+
|
17 |
+
display_columns = [
|
18 |
+
"Rank", "Model", "WISE", "SICR", "nDCG@10(Original)", "nDCG@10(Instructed)",
|
19 |
+
"nDCG@10(Reversely Instructed)", "MRR@1(Original)", "MRR@1(Instructed)",
|
20 |
+
"MRR@1(Reversely Instructed)"
|
21 |
+
]
|
22 |
+
|
23 |
+
data_type = ["number", "markdown", "number", "number", "number", "number", "number", "number", "number", "number"]
|
24 |
+
|
25 |
+
css = """
|
26 |
+
table > thead {
|
27 |
+
white-space: normal
|
28 |
+
}
|
29 |
+
|
30 |
+
table {
|
31 |
+
--cell-width-1: 250px
|
32 |
+
}
|
33 |
+
|
34 |
+
table > tbody > tr > td:nth-child(2) > div {
|
35 |
+
overflow-x: auto
|
36 |
+
}
|
37 |
+
|
38 |
+
.filter-checkbox-group {
|
39 |
+
max-width: max-content;
|
40 |
+
}
|
41 |
+
|
42 |
+
.fixed-height-table {
|
43 |
+
height: 100px;
|
44 |
+
overflow-y: scroll;
|
45 |
+
}
|
46 |
+
|
47 |
+
"""
|
48 |
+
|
49 |
+
# create Gradio interface
|
50 |
+
with gr.Blocks() as demo:
|
51 |
+
gr.Markdown("# 🤗 InfoSearch Benchmark Leaderboard")
|
52 |
+
with gr.Tabs() as tabs:
|
53 |
+
with gr.TabItem("🏅 InfoSearch Benchmark"):
|
54 |
+
for dimension in dimensions:
|
55 |
+
with gr.Tab(dimension):
|
56 |
+
data = get_data(f"{dimension}")
|
57 |
+
gr.Dataframe(data,
|
58 |
+
headers=display_columns,
|
59 |
+
datatype=data_type,
|
60 |
+
interactive=False, elem_classes=["fixed-height-table"])
|
61 |
+
with gr.TabItem("🚀 Submit here!"):
|
62 |
+
gr.Markdown("✉️✨ Submit your model here.")
|
63 |
+
file_upload = gr.File(label="Upload your JSON file", file_types=["json"])
|
64 |
+
submit_button = gr.Button("Submit")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
submission_result = gr.Markdown()
|
66 |
+
submit_button.click(submit, file_upload, submission_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
scheduler = BackgroundScheduler()
|
69 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
utils.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
dimensions = ['Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source']
|
7 |
+
|
8 |
+
|
9 |
+
def make_clickable_model(model_name, link):
|
10 |
+
return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name}</a>'
|
11 |
+
|
12 |
+
|
13 |
+
def rerank():
|
14 |
+
for dimension in dimensions:
|
15 |
+
with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
|
16 |
+
data = [json.loads(line) for line in f]
|
17 |
+
data.sort(key=lambda x: (x["WISE"], x["SICR"]), reverse=True)
|
18 |
+
# 排序完后按顺序重新赋值 Rank,这个Rank是从1开始的,且放在第一列
|
19 |
+
for i, d in enumerate(data):
|
20 |
+
d["Rank"] = i + 1
|
21 |
+
|
22 |
+
with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
|
23 |
+
for d in data:
|
24 |
+
# 重新构建字典,使 Rank 成为第一个键
|
25 |
+
ordered_d = {"Rank": d["Rank"]}
|
26 |
+
ordered_d.update({k: v for k, v in d.items() if k != "Rank"})
|
27 |
+
f.write(json.dumps(ordered_d) + "\n")
|
28 |
+
|
29 |
+
|
30 |
+
def generate_sample_data():
|
31 |
+
model_names = []
|
32 |
+
for i in range(10):
|
33 |
+
model_names.append(f"Model_{i}")
|
34 |
+
for dimension in dimensions:
|
35 |
+
for model_name in model_names:
|
36 |
+
data = {
|
37 |
+
"Model": make_clickable_model(model_name, f"https://huggingface.co/"),
|
38 |
+
"WISE": round(random.uniform(0, 1), 2),
|
39 |
+
"SICR": round(random.uniform(0, 1), 2),
|
40 |
+
"nDCG@10(Original)": round(random.uniform(0, 1), 2),
|
41 |
+
"nDCG@10(Instructed)": round(random.uniform(0, 1), 2),
|
42 |
+
"nDCG@10(Reversely Instructed)": round(random.uniform(0, 1), 2),
|
43 |
+
"MRR@1(Original)": round(random.uniform(0, 1), 2),
|
44 |
+
"MRR@1(Instructed)": round(random.uniform(0, 1), 2),
|
45 |
+
"MRR@1(Reversely Instructed)": round(random.uniform(0, 1), 2),
|
46 |
+
}
|
47 |
+
with open(f"all_dimensions/{dimension}.jsonl", "a") as f:
|
48 |
+
f.write(json.dumps(data) + "\n")
|
49 |
+
|
50 |
+
|
51 |
+
def get_data(dimension):
|
52 |
+
with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
|
53 |
+
data = [json.loads(line) for line in f]
|
54 |
+
|
55 |
+
return pd.DataFrame(data)
|
56 |
+
|
57 |
+
|
58 |
+
def submit(json_file):
|
59 |
+
flag, message = check_json_file(json_file)
|
60 |
+
if flag:
|
61 |
+
with open(json_file, "r") as f:
|
62 |
+
data = json.load(f)
|
63 |
+
if data['in_huggingface_hub']:
|
64 |
+
model_name = make_clickable_model(data["Model"], f"https://huggingface.co")
|
65 |
+
else:
|
66 |
+
if data["Model Link"]:
|
67 |
+
model_name = make_clickable_model(data["Model"], data["Model Link"])
|
68 |
+
else:
|
69 |
+
model_name = data["Model"]
|
70 |
+
all_dimension_data = data["dimensions"]
|
71 |
+
for dimension in dimensions:
|
72 |
+
each_dimension_data = all_dimension_data[dimension]
|
73 |
+
# 如果temp/{dimension}.jsonl文件不存在,则创建
|
74 |
+
if not os.path.exists(f"temp/{dimension}.jsonl"):
|
75 |
+
with open(f"temp/{dimension}.jsonl", "w"):
|
76 |
+
pass
|
77 |
+
with open(f"temp/{dimension}.jsonl", "a") as f:
|
78 |
+
f.write(json.dumps({
|
79 |
+
"Model": model_name,
|
80 |
+
"WISE": each_dimension_data["WISE"],
|
81 |
+
"SICR": each_dimension_data["SICR"],
|
82 |
+
"nDCG@10(Original)": each_dimension_data["nDCG@10(Original)"],
|
83 |
+
"nDCG@10(Instructed)": each_dimension_data["nDCG@10(Instructed)"],
|
84 |
+
"nDCG@10(Reversely Instructed)": each_dimension_data["nDCG@10(Reversely Instructed)"],
|
85 |
+
"MRR@1(Original)": each_dimension_data["MRR@1(Original)"],
|
86 |
+
"MRR@1(Instructed)": each_dimension_data["MRR@1(Instructed)"],
|
87 |
+
"MRR@1(Reversely Instructed)": each_dimension_data["MRR@1(Reversely Instructed)"]
|
88 |
+
}) + "\n")
|
89 |
+
return "Submission successful."
|
90 |
+
else:
|
91 |
+
return message
|
92 |
+
|
93 |
+
|
94 |
+
def refresh():
|
95 |
+
if is_empty("temp"):
|
96 |
+
return
|
97 |
+
for dimension in dimensions:
|
98 |
+
# 读取temp/{dimension}.jsonl文件
|
99 |
+
with open(f"temp/{dimension}.jsonl", "r") as f:
|
100 |
+
data = [json.loads(line) for line in f]
|
101 |
+
# 将其写入all_dimensions/{dimension}.jsonl文件
|
102 |
+
# 如果存在相同的模型,则覆盖
|
103 |
+
with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
|
104 |
+
all_data = [json.loads(line) for line in f]
|
105 |
+
for d in data:
|
106 |
+
for i, ad in enumerate(all_data):
|
107 |
+
if ad["Model"] == d["Model"]:
|
108 |
+
all_data[i] = d
|
109 |
+
break
|
110 |
+
else:
|
111 |
+
all_data.append(d)
|
112 |
+
with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
|
113 |
+
for d in all_data:
|
114 |
+
f.write(json.dumps(d) + "\n")
|
115 |
+
# 删除temp/{dimension}.jsonl文件
|
116 |
+
os.remove(f"temp/{dimension}.jsonl")
|
117 |
+
rerank()
|
118 |
+
|
119 |
+
|
120 |
+
def check_json_file(json_file):
|
121 |
+
with open(json_file, "r") as f:
|
122 |
+
try:
|
123 |
+
data = json.load(f)
|
124 |
+
except json.JSONDecodeError:
|
125 |
+
return False, "JSON file is not valid JSON."
|
126 |
+
# 检查dimensions键是否存在且是否存在对应的值('Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source')
|
127 |
+
if "dimensions" not in data:
|
128 |
+
return False, "JSON file does not contain 'dimensions' key.",
|
129 |
+
|
130 |
+
all_dimension_data = data["dimensions"]
|
131 |
+
if not all([d in all_dimension_data for d in dimensions]):
|
132 |
+
return False, "JSON file does not contain all dimensions.",
|
133 |
+
# 检查每一个维度的数据是否符合要求( WISE, SICR, nDCG@10(Original), nDCG@10(Instructed), nDCG@10(Reversely Instructed), MRR@1(Original), MRR@1(Instructed), MRR@1(Reversely Instructed))
|
134 |
+
|
135 |
+
for d in dimensions:
|
136 |
+
each_dimension_data = all_dimension_data[d]
|
137 |
+
if not all(k in each_dimension_data for k in
|
138 |
+
["WISE", "SICR", "nDCG@10(Original)", "nDCG@10(Instructed)",
|
139 |
+
"nDCG@10(Reversely Instructed)", "MRR@1(Original)", "MRR@1(Instructed)",
|
140 |
+
"MRR@1(Reversely Instructed)"]):
|
141 |
+
return False, f"Dimension '{d}' does not contain all required keys.",
|
142 |
+
|
143 |
+
return True, "JSON file is valid."
|
144 |
+
|
145 |
+
|
146 |
+
def is_empty(dir_path):
|
147 |
+
return len(os.listdir(dir_path)) == 0
|