Thun09 commited on
Commit
ee9a738
·
1 Parent(s): 66f693f

Update space

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/Leaderboard.iml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="PLAIN" />
10
+ <option name="myDocStringFormat" value="Plain" />
11
+ </component>
12
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredIdentifiers">
6
+ <list>
7
+ <option value="db_file" />
8
+ </list>
9
+ </option>
10
+ </inspection_tool>
11
+ </profile>
12
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="leaderboard" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="leaderboard" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/Leaderboard.iml" filepath="$PROJECT_DIR$/.idea/Leaderboard.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
all_dimensions/Audience.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.99, "SICR": 0.94, "nDCG@10(Original)": 0.89, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.97, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.09}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.84, "SICR": 0.21, "nDCG@10(Original)": 0.5, "nDCG@10(Instructed)": 0.68, "nDCG@10(Reversely Instructed)": 0.86, "MRR@1(Original)": 0.79, "MRR@1(Instructed)": 0.91, "MRR@1(Reversely Instructed)": 0.12}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.71, "SICR": 0.49, "nDCG@10(Original)": 0.74, "nDCG@10(Instructed)": 0.35, "nDCG@10(Reversely Instructed)": 0.56, "MRR@1(Original)": 0.34, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.27}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.52, "SICR": 0.99, "nDCG@10(Original)": 0.08, "nDCG@10(Instructed)": 0.2, "nDCG@10(Reversely Instructed)": 0.55, "MRR@1(Original)": 0.09, "MRR@1(Instructed)": 0.58, "MRR@1(Reversely Instructed)": 0.59}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.39, "SICR": 0.61, "nDCG@10(Original)": 0.21, "nDCG@10(Instructed)": 0.46, "nDCG@10(Reversely Instructed)": 0.13, "MRR@1(Original)": 0.35, "MRR@1(Instructed)": 0.8, "MRR@1(Reversely Instructed)": 0.1}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.35, "SICR": 0.82, "nDCG@10(Original)": 0.42, "nDCG@10(Instructed)": 0.92, "nDCG@10(Reversely Instructed)": 0.17, "MRR@1(Original)": 0.39, "MRR@1(Instructed)": 0.24, "MRR@1(Reversely Instructed)": 0.32}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.26, "SICR": 0.17, "nDCG@10(Original)": 0.68, "nDCG@10(Instructed)": 0.73, "nDCG@10(Reversely Instructed)": 0.58, "MRR@1(Original)": 0.13, "MRR@1(Instructed)": 0.62, "MRR@1(Reversely Instructed)": 0.9}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.22, "SICR": 0.69, "nDCG@10(Original)": 0.44, "nDCG@10(Instructed)": 0.25, "nDCG@10(Reversely Instructed)": 0.99, "MRR@1(Original)": 0.41, "MRR@1(Instructed)": 0.35, "MRR@1(Reversely Instructed)": 0.16}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.13, "SICR": 0.13, "nDCG@10(Original)": 0.71, "nDCG@10(Instructed)": 0.85, "nDCG@10(Reversely Instructed)": 0.09, "MRR@1(Original)": 0.08, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.09}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.01, "SICR": 0.65, "nDCG@10(Original)": 0.01, "nDCG@10(Instructed)": 0.35, "nDCG@10(Reversely Instructed)": 0.82, "MRR@1(Original)": 0.69, "MRR@1(Instructed)": 0.94, "MRR@1(Reversely Instructed)": 0.96}
all_dimensions/Format.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.92, "SICR": 0.33, "nDCG@10(Original)": 0.02, "nDCG@10(Instructed)": 0.23, "nDCG@10(Reversely Instructed)": 0.65, "MRR@1(Original)": 0.1, "MRR@1(Instructed)": 0.1, "MRR@1(Reversely Instructed)": 0.51}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.88, "SICR": 0.35, "nDCG@10(Original)": 0.69, "nDCG@10(Instructed)": 0.48, "nDCG@10(Reversely Instructed)": 0.25, "MRR@1(Original)": 0.3, "MRR@1(Instructed)": 0.92, "MRR@1(Reversely Instructed)": 0.03}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.81, "SICR": 0.32, "nDCG@10(Original)": 0.22, "nDCG@10(Instructed)": 0.67, "nDCG@10(Reversely Instructed)": 0.79, "MRR@1(Original)": 0.59, "MRR@1(Instructed)": 0.97, "MRR@1(Reversely Instructed)": 0.05}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.7, "SICR": 0.69, "nDCG@10(Original)": 0.82, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.08, "MRR@1(Original)": 0.57, "MRR@1(Instructed)": 0.54, "MRR@1(Reversely Instructed)": 0.34}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.67, "SICR": 0.04, "nDCG@10(Original)": 0.61, "nDCG@10(Instructed)": 0.28, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.85, "MRR@1(Reversely Instructed)": 0.09}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.56, "SICR": 0.86, "nDCG@10(Original)": 0.98, "nDCG@10(Instructed)": 0.22, "nDCG@10(Reversely Instructed)": 0.95, "MRR@1(Original)": 0.69, "MRR@1(Instructed)": 0.63, "MRR@1(Reversely Instructed)": 0.22}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.33, "SICR": 0.92, "nDCG@10(Original)": 0.38, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.09, "MRR@1(Original)": 0.26, "MRR@1(Instructed)": 0.89, "MRR@1(Reversely Instructed)": 0.79}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.21, "SICR": 0.82, "nDCG@10(Original)": 0.85, "nDCG@10(Instructed)": 0.97, "nDCG@10(Reversely Instructed)": 0.69, "MRR@1(Original)": 0.81, "MRR@1(Instructed)": 0.49, "MRR@1(Reversely Instructed)": 0.43}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.15, "SICR": 0.94, "nDCG@10(Original)": 0.02, "nDCG@10(Instructed)": 0.9, "nDCG@10(Reversely Instructed)": 0.95, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.32, "MRR@1(Reversely Instructed)": 0.12}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.11, "SICR": 0.06, "nDCG@10(Original)": 0.09, "nDCG@10(Instructed)": 0.72, "nDCG@10(Reversely Instructed)": 0.61, "MRR@1(Original)": 0.16, "MRR@1(Instructed)": 0.29, "MRR@1(Reversely Instructed)": 0.61}
all_dimensions/Keyword.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.83, "SICR": 0.8, "nDCG@10(Original)": 0.61, "nDCG@10(Instructed)": 0.19, "nDCG@10(Reversely Instructed)": 0.46, "MRR@1(Original)": 0.13, "MRR@1(Instructed)": 0.37, "MRR@1(Reversely Instructed)": 0.97}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.78, "SICR": 0.56, "nDCG@10(Original)": 0.73, "nDCG@10(Instructed)": 0.49, "nDCG@10(Reversely Instructed)": 0.34, "MRR@1(Original)": 0.18, "MRR@1(Instructed)": 0.2, "MRR@1(Reversely Instructed)": 0.52}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.76, "SICR": 0.12, "nDCG@10(Original)": 0.05, "nDCG@10(Instructed)": 0.24, "nDCG@10(Reversely Instructed)": 0.89, "MRR@1(Original)": 0.03, "MRR@1(Instructed)": 0.99, "MRR@1(Reversely Instructed)": 0.45}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.73, "SICR": 0.35, "nDCG@10(Original)": 0.31, "nDCG@10(Instructed)": 0.18, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.01, "MRR@1(Instructed)": 0.26, "MRR@1(Reversely Instructed)": 0.73}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.62, "SICR": 0.58, "nDCG@10(Original)": 0.66, "nDCG@10(Instructed)": 0.58, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.64, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.68}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.54, "SICR": 0.42, "nDCG@10(Original)": 0.34, "nDCG@10(Instructed)": 0.03, "nDCG@10(Reversely Instructed)": 0.88, "MRR@1(Original)": 0.99, "MRR@1(Instructed)": 0.27, "MRR@1(Reversely Instructed)": 0.37}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.47, "SICR": 0.43, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.61, "nDCG@10(Reversely Instructed)": 0.49, "MRR@1(Original)": 0.23, "MRR@1(Instructed)": 0.94, "MRR@1(Reversely Instructed)": 0.41}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.34, "SICR": 0.49, "nDCG@10(Original)": 0.15, "nDCG@10(Instructed)": 0.98, "nDCG@10(Reversely Instructed)": 0.9, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.29, "MRR@1(Reversely Instructed)": 0.73}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.14, "SICR": 0.92, "nDCG@10(Original)": 0.39, "nDCG@10(Instructed)": 0.1, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.35, "MRR@1(Instructed)": 0.36, "MRR@1(Reversely Instructed)": 1.0}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.06, "SICR": 0.16, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.44, "nDCG@10(Reversely Instructed)": 0.53, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.64, "MRR@1(Reversely Instructed)": 0.14}
all_dimensions/Language.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.74, "SICR": 0.41, "nDCG@10(Original)": 0.97, "nDCG@10(Instructed)": 0.43, "nDCG@10(Reversely Instructed)": 0.2, "MRR@1(Original)": 0.45, "MRR@1(Instructed)": 0.41, "MRR@1(Reversely Instructed)": 0.88}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.65, "SICR": 0.88, "nDCG@10(Original)": 0.75, "nDCG@10(Instructed)": 0.14, "nDCG@10(Reversely Instructed)": 0.43, "MRR@1(Original)": 0.05, "MRR@1(Instructed)": 0.75, "MRR@1(Reversely Instructed)": 0.12}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.65, "SICR": 0.25, "nDCG@10(Original)": 0.95, "nDCG@10(Instructed)": 0.14, "nDCG@10(Reversely Instructed)": 0.94, "MRR@1(Original)": 0.81, "MRR@1(Instructed)": 0.37, "MRR@1(Reversely Instructed)": 0.28}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.48, "SICR": 0.38, "nDCG@10(Original)": 0.37, "nDCG@10(Instructed)": 0.32, "nDCG@10(Reversely Instructed)": 0.29, "MRR@1(Original)": 0.26, "MRR@1(Instructed)": 0.51, "MRR@1(Reversely Instructed)": 0.84}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.24, "SICR": 0.26, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.89, "nDCG@10(Reversely Instructed)": 0.62, "MRR@1(Original)": 0.73, "MRR@1(Instructed)": 0.84, "MRR@1(Reversely Instructed)": 0.79}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.22, "SICR": 0.77, "nDCG@10(Original)": 0.71, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.21, "MRR@1(Original)": 0.22, "MRR@1(Instructed)": 0.82, "MRR@1(Reversely Instructed)": 0.2}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.18, "SICR": 0.25, "nDCG@10(Original)": 0.53, "nDCG@10(Instructed)": 0.64, "nDCG@10(Reversely Instructed)": 0.58, "MRR@1(Original)": 0.32, "MRR@1(Instructed)": 0.41, "MRR@1(Reversely Instructed)": 0.15}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.09, "SICR": 0.43, "nDCG@10(Original)": 0.82, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.43, "MRR@1(Original)": 0.03, "MRR@1(Instructed)": 0.53, "MRR@1(Reversely Instructed)": 0.83}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.07, "SICR": 0.14, "nDCG@10(Original)": 0.17, "nDCG@10(Instructed)": 0.86, "nDCG@10(Reversely Instructed)": 0.9, "MRR@1(Original)": 0.93, "MRR@1(Instructed)": 0.99, "MRR@1(Reversely Instructed)": 0.96}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.05, "SICR": 0.29, "nDCG@10(Original)": 0.45, "nDCG@10(Instructed)": 0.74, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.23, "MRR@1(Instructed)": 0.65, "MRR@1(Reversely Instructed)": 0.46}
all_dimensions/Length.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.98, "SICR": 0.77, "nDCG@10(Original)": 0.28, "nDCG@10(Instructed)": 0.09, "nDCG@10(Reversely Instructed)": 0.32, "MRR@1(Original)": 0.25, "MRR@1(Instructed)": 0.22, "MRR@1(Reversely Instructed)": 0.81}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.97, "SICR": 0.51, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.41, "nDCG@10(Reversely Instructed)": 0.4, "MRR@1(Original)": 0.21, "MRR@1(Instructed)": 0.14, "MRR@1(Reversely Instructed)": 0.9}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.95, "SICR": 0.8, "nDCG@10(Original)": 0.72, "nDCG@10(Instructed)": 0.32, "nDCG@10(Reversely Instructed)": 0.08, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.05, "MRR@1(Reversely Instructed)": 0.3}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.89, "SICR": 0.35, "nDCG@10(Original)": 0.65, "nDCG@10(Instructed)": 0.19, "nDCG@10(Reversely Instructed)": 0.88, "MRR@1(Original)": 0.06, "MRR@1(Instructed)": 0.9, "MRR@1(Reversely Instructed)": 0.02}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.78, "SICR": 0.18, "nDCG@10(Original)": 0.28, "nDCG@10(Instructed)": 0.89, "nDCG@10(Reversely Instructed)": 0.34, "MRR@1(Original)": 0.8, "MRR@1(Instructed)": 0.49, "MRR@1(Reversely Instructed)": 0.01}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.77, "SICR": 0.46, "nDCG@10(Original)": 0.98, "nDCG@10(Instructed)": 0.99, "nDCG@10(Reversely Instructed)": 0.19, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.11, "MRR@1(Reversely Instructed)": 0.36}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.75, "SICR": 0.88, "nDCG@10(Original)": 0.07, "nDCG@10(Instructed)": 0.75, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.57, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.38}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.61, "SICR": 0.27, "nDCG@10(Original)": 0.1, "nDCG@10(Instructed)": 0.11, "nDCG@10(Reversely Instructed)": 0.78, "MRR@1(Original)": 0.3, "MRR@1(Instructed)": 0.96, "MRR@1(Reversely Instructed)": 0.58}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.2, "SICR": 0.73, "nDCG@10(Original)": 0.68, "nDCG@10(Instructed)": 0.01, "nDCG@10(Reversely Instructed)": 0.12, "MRR@1(Original)": 0.7, "MRR@1(Instructed)": 0.62, "MRR@1(Reversely Instructed)": 0.79}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.08, "SICR": 0.21, "nDCG@10(Original)": 0.92, "nDCG@10(Instructed)": 0.91, "nDCG@10(Reversely Instructed)": 0.81, "MRR@1(Original)": 0.16, "MRR@1(Instructed)": 0.01, "MRR@1(Reversely Instructed)": 0.22}
all_dimensions/Source.jsonl ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"Rank": 1, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_9</a>", "WISE": 0.84, "SICR": 0.72, "nDCG@10(Original)": 0.31, "nDCG@10(Instructed)": 0.18, "nDCG@10(Reversely Instructed)": 0.75, "MRR@1(Original)": 0.48, "MRR@1(Instructed)": 0.98, "MRR@1(Reversely Instructed)": 0.86}
2
+ {"Rank": 2, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_3</a>", "WISE": 0.69, "SICR": 0.62, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.53, "nDCG@10(Reversely Instructed)": 0.5, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.67, "MRR@1(Reversely Instructed)": 0.81}
3
+ {"Rank": 3, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_0</a>", "WISE": 0.61, "SICR": 0.84, "nDCG@10(Original)": 0.55, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.03, "MRR@1(Original)": 0.24, "MRR@1(Instructed)": 0.59, "MRR@1(Reversely Instructed)": 0.15}
4
+ {"Rank": 4, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_7</a>", "WISE": 0.6, "SICR": 0.86, "nDCG@10(Original)": 0.46, "nDCG@10(Instructed)": 0.8, "nDCG@10(Reversely Instructed)": 0.21, "MRR@1(Original)": 0.55, "MRR@1(Instructed)": 0.26, "MRR@1(Reversely Instructed)": 0.42}
5
+ {"Rank": 5, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_6</a>", "WISE": 0.55, "SICR": 0.73, "nDCG@10(Original)": 0.23, "nDCG@10(Instructed)": 0.56, "nDCG@10(Reversely Instructed)": 0.47, "MRR@1(Original)": 0.89, "MRR@1(Instructed)": 0.23, "MRR@1(Reversely Instructed)": 0.83}
6
+ {"Rank": 6, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_1</a>", "WISE": 0.48, "SICR": 0.1, "nDCG@10(Original)": 0.12, "nDCG@10(Instructed)": 0.56, "nDCG@10(Reversely Instructed)": 0.93, "MRR@1(Original)": 0.67, "MRR@1(Instructed)": 0.66, "MRR@1(Reversely Instructed)": 0.02}
7
+ {"Rank": 7, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_4</a>", "WISE": 0.32, "SICR": 0.77, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.83, "nDCG@10(Reversely Instructed)": 0.13, "MRR@1(Original)": 0.14, "MRR@1(Instructed)": 0.0, "MRR@1(Reversely Instructed)": 0.08}
8
+ {"Rank": 8, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_8</a>", "WISE": 0.29, "SICR": 0.42, "nDCG@10(Original)": 0.45, "nDCG@10(Instructed)": 0.31, "nDCG@10(Reversely Instructed)": 0.7, "MRR@1(Original)": 0.66, "MRR@1(Instructed)": 0.12, "MRR@1(Reversely Instructed)": 0.44}
9
+ {"Rank": 9, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_2</a>", "WISE": 0.17, "SICR": 0.91, "nDCG@10(Original)": 0.76, "nDCG@10(Instructed)": 0.78, "nDCG@10(Reversely Instructed)": 0.7, "MRR@1(Original)": 0.77, "MRR@1(Instructed)": 0.88, "MRR@1(Reversely Instructed)": 0.55}
10
+ {"Rank": 10, "Model": "<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https://huggingface.co/\">Model_5</a>", "WISE": 0.13, "SICR": 0.44, "nDCG@10(Original)": 0.4, "nDCG@10(Instructed)": 0.93, "nDCG@10(Reversely Instructed)": 0.28, "MRR@1(Original)": 0.68, "MRR@1(Instructed)": 0.43, "MRR@1(Reversely Instructed)": 0.44}
app.py CHANGED
@@ -2,201 +2,68 @@ import gradio as gr
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
 
31
 
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
34
 
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
-
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
  submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
190
-
191
- with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
 
201
  scheduler = BackgroundScheduler()
202
  scheduler.add_job(restart_space, "interval", seconds=1800)
 
2
  from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
7
+
8
+ from utils import get_data, submit
9
 
10
 
11
  def restart_space():
12
  API.restart_space(repo_id=REPO_ID)
13
 
14
+
15
+ dimensions = ['Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source']
16
+
17
+ display_columns = [
18
+ "Rank", "Model", "WISE", "SICR", "nDCG@10(Original)", "nDCG@10(Instructed)",
19
+ "nDCG@10(Reversely Instructed)", "MRR@1(Original)", "MRR@1(Instructed)",
20
+ "MRR@1(Reversely Instructed)"
21
+ ]
22
+
23
+ data_type = ["number", "markdown", "number", "number", "number", "number", "number", "number", "number", "number"]
24
+
25
+ css = """
26
+ table > thead {
27
+ white-space: normal
28
+ }
29
+
30
+ table {
31
+ --cell-width-1: 250px
32
+ }
33
+
34
+ table > tbody > tr > td:nth-child(2) > div {
35
+ overflow-x: auto
36
+ }
37
+
38
+ .filter-checkbox-group {
39
+ max-width: max-content;
40
+ }
41
+
42
+ .fixed-height-table {
43
+ height: 100px;
44
+ overflow-y: scroll;
45
+ }
46
+
47
+ """
48
+
49
+ # create Gradio interface
50
+ with gr.Blocks() as demo:
51
+ gr.Markdown("# 🤗 InfoSearch Benchmark Leaderboard")
52
+ with gr.Tabs() as tabs:
53
+ with gr.TabItem("🏅 InfoSearch Benchmark"):
54
+ for dimension in dimensions:
55
+ with gr.Tab(dimension):
56
+ data = get_data(f"{dimension}")
57
+ gr.Dataframe(data,
58
+ headers=display_columns,
59
+ datatype=data_type,
60
+ interactive=False, elem_classes=["fixed-height-table"])
61
+ with gr.TabItem("🚀 Submit here!"):
62
+ gr.Markdown("✉️✨ Submit your model here.")
63
+ file_upload = gr.File(label="Upload your JSON file", file_types=["json"])
64
+ submit_button = gr.Button("Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  submission_result = gr.Markdown()
66
+ submit_button.click(submit, file_upload, submission_result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  scheduler = BackgroundScheduler()
69
  scheduler.add_job(restart_space, "interval", seconds=1800)
utils.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+ import json
4
+ import pandas as pd
5
+
6
+ dimensions = ['Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source']
7
+
8
+
9
+ def make_clickable_model(model_name, link):
10
+ return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name}</a>'
11
+
12
+
13
+ def rerank():
14
+ for dimension in dimensions:
15
+ with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
16
+ data = [json.loads(line) for line in f]
17
+ data.sort(key=lambda x: (x["WISE"], x["SICR"]), reverse=True)
18
+ # 排序完后按顺序重新赋值 Rank,这个Rank是从1开始的,且放在第一列
19
+ for i, d in enumerate(data):
20
+ d["Rank"] = i + 1
21
+
22
+ with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
23
+ for d in data:
24
+ # 重新构建字典,使 Rank 成为第一个键
25
+ ordered_d = {"Rank": d["Rank"]}
26
+ ordered_d.update({k: v for k, v in d.items() if k != "Rank"})
27
+ f.write(json.dumps(ordered_d) + "\n")
28
+
29
+
30
+ def generate_sample_data():
31
+ model_names = []
32
+ for i in range(10):
33
+ model_names.append(f"Model_{i}")
34
+ for dimension in dimensions:
35
+ for model_name in model_names:
36
+ data = {
37
+ "Model": make_clickable_model(model_name, f"https://huggingface.co/"),
38
+ "WISE": round(random.uniform(0, 1), 2),
39
+ "SICR": round(random.uniform(0, 1), 2),
40
+ "nDCG@10(Original)": round(random.uniform(0, 1), 2),
41
+ "nDCG@10(Instructed)": round(random.uniform(0, 1), 2),
42
+ "nDCG@10(Reversely Instructed)": round(random.uniform(0, 1), 2),
43
+ "MRR@1(Original)": round(random.uniform(0, 1), 2),
44
+ "MRR@1(Instructed)": round(random.uniform(0, 1), 2),
45
+ "MRR@1(Reversely Instructed)": round(random.uniform(0, 1), 2),
46
+ }
47
+ with open(f"all_dimensions/{dimension}.jsonl", "a") as f:
48
+ f.write(json.dumps(data) + "\n")
49
+
50
+
51
+ def get_data(dimension):
52
+ with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
53
+ data = [json.loads(line) for line in f]
54
+
55
+ return pd.DataFrame(data)
56
+
57
+
58
+ def submit(json_file):
59
+ flag, message = check_json_file(json_file)
60
+ if flag:
61
+ with open(json_file, "r") as f:
62
+ data = json.load(f)
63
+ if data['in_huggingface_hub']:
64
+ model_name = make_clickable_model(data["Model"], f"https://huggingface.co")
65
+ else:
66
+ if data["Model Link"]:
67
+ model_name = make_clickable_model(data["Model"], data["Model Link"])
68
+ else:
69
+ model_name = data["Model"]
70
+ all_dimension_data = data["dimensions"]
71
+ for dimension in dimensions:
72
+ each_dimension_data = all_dimension_data[dimension]
73
+ # 如果temp/{dimension}.jsonl文件不存在,则创建
74
+ if not os.path.exists(f"temp/{dimension}.jsonl"):
75
+ with open(f"temp/{dimension}.jsonl", "w"):
76
+ pass
77
+ with open(f"temp/{dimension}.jsonl", "a") as f:
78
+ f.write(json.dumps({
79
+ "Model": model_name,
80
+ "WISE": each_dimension_data["WISE"],
81
+ "SICR": each_dimension_data["SICR"],
82
+ "nDCG@10(Original)": each_dimension_data["nDCG@10(Original)"],
83
+ "nDCG@10(Instructed)": each_dimension_data["nDCG@10(Instructed)"],
84
+ "nDCG@10(Reversely Instructed)": each_dimension_data["nDCG@10(Reversely Instructed)"],
85
+ "MRR@1(Original)": each_dimension_data["MRR@1(Original)"],
86
+ "MRR@1(Instructed)": each_dimension_data["MRR@1(Instructed)"],
87
+ "MRR@1(Reversely Instructed)": each_dimension_data["MRR@1(Reversely Instructed)"]
88
+ }) + "\n")
89
+ return "Submission successful."
90
+ else:
91
+ return message
92
+
93
+
94
+ def refresh():
95
+ if is_empty("temp"):
96
+ return
97
+ for dimension in dimensions:
98
+ # 读取temp/{dimension}.jsonl文件
99
+ with open(f"temp/{dimension}.jsonl", "r") as f:
100
+ data = [json.loads(line) for line in f]
101
+ # 将其写入all_dimensions/{dimension}.jsonl文件
102
+ # 如果存在相同的模型,则覆盖
103
+ with open(f"all_dimensions/{dimension}.jsonl", "r") as f:
104
+ all_data = [json.loads(line) for line in f]
105
+ for d in data:
106
+ for i, ad in enumerate(all_data):
107
+ if ad["Model"] == d["Model"]:
108
+ all_data[i] = d
109
+ break
110
+ else:
111
+ all_data.append(d)
112
+ with open(f"all_dimensions/{dimension}.jsonl", "w") as f:
113
+ for d in all_data:
114
+ f.write(json.dumps(d) + "\n")
115
+ # 删除temp/{dimension}.jsonl文件
116
+ os.remove(f"temp/{dimension}.jsonl")
117
+ rerank()
118
+
119
+
120
+ def check_json_file(json_file):
121
+ with open(json_file, "r") as f:
122
+ try:
123
+ data = json.load(f)
124
+ except json.JSONDecodeError:
125
+ return False, "JSON file is not valid JSON."
126
+ # 检查dimensions键是否存在且是否存在对应的值('Audience', 'Keyword', 'Format', 'Language', 'Length', 'Source')
127
+ if "dimensions" not in data:
128
+ return False, "JSON file does not contain 'dimensions' key.",
129
+
130
+ all_dimension_data = data["dimensions"]
131
+ if not all([d in all_dimension_data for d in dimensions]):
132
+ return False, "JSON file does not contain all dimensions.",
133
+ # 检查每一个维度的数据是否符合要求( WISE, SICR, nDCG@10(Original), nDCG@10(Instructed), nDCG@10(Reversely Instructed), MRR@1(Original), MRR@1(Instructed), MRR@1(Reversely Instructed))
134
+
135
+ for d in dimensions:
136
+ each_dimension_data = all_dimension_data[d]
137
+ if not all(k in each_dimension_data for k in
138
+ ["WISE", "SICR", "nDCG@10(Original)", "nDCG@10(Instructed)",
139
+ "nDCG@10(Reversely Instructed)", "MRR@1(Original)", "MRR@1(Instructed)",
140
+ "MRR@1(Reversely Instructed)"]):
141
+ return False, f"Dimension '{d}' does not contain all required keys.",
142
+
143
+ return True, "JSON file is valid."
144
+
145
+
146
+ def is_empty(dir_path):
147
+ return len(os.listdir(dir_path)) == 0