Spaces:
Sleeping
Sleeping
XufengDuan
commited on
Commit
•
6d8d412
1
Parent(s):
efa3bcc
update scripts
Browse files- src/display/about.py +22 -22
src/display/about.py
CHANGED
@@ -12,26 +12,26 @@ class Tasks(Enum):
|
|
12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
15 |
-
E1 = Task("E1", "E1", "
|
16 |
-
E1_ci = Task("E1_ci", "E1_ci", "
|
17 |
-
E2 = Task("E2", "E2", "
|
18 |
-
E2_ci = Task("E2_ci", "E2_ci", "
|
19 |
-
E3 = Task("E3", "E3", "
|
20 |
-
E3_ci = Task("E3_ci", "E3_ci", "
|
21 |
-
E4 = Task("E4", "E4", "
|
22 |
-
E4_ci = Task("E4_ci", "E4_ci", "
|
23 |
-
E5 = Task("E5", "E5", "
|
24 |
-
E5_ci = Task("E5_ci", "E5_ci", "
|
25 |
-
E6 = Task("E6", "E6", "
|
26 |
-
E6_ci = Task("E6_ci", "E6_ci", "
|
27 |
-
E7 = Task("E7", "E7", "
|
28 |
-
E7_ci = Task("E7_ci", "E7_ci", "
|
29 |
-
E8 = Task("E8", "E8", "
|
30 |
-
E8_ci = Task("E8_ci", "E8_ci", "
|
31 |
-
E9 = Task("E9", "E9", "
|
32 |
-
E9_ci = Task("E9_ci", "E9_ci", "
|
33 |
-
E10 = Task("E10", "E10", "
|
34 |
-
E10_ci = Task("E10_ci", "E10_ci", "
|
35 |
|
36 |
|
37 |
|
@@ -51,8 +51,8 @@ LLM_BENCHMARKS_TEXT = """
|
|
51 |
|
52 |
This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
|
53 |
|
54 |
-
1. **
|
55 |
-
2. **
|
56 |
3. **Word:** Word Length and Predictivity<br>
|
57 |
4. **Word:** Word Meaning Priming<br>
|
58 |
5. **Syntax:** Structural Priming<br>
|
|
|
12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
15 |
+
E1 = Task("E1", "E1", "Sound-1")
|
16 |
+
E1_ci = Task("E1_ci", "E1_ci", "Sound-1 CI")
|
17 |
+
E2 = Task("E2", "E2", "Sound-2")
|
18 |
+
E2_ci = Task("E2_ci", "E2_ci", "Sound-2 CI")
|
19 |
+
E3 = Task("E3", "E3", "Word-1")
|
20 |
+
E3_ci = Task("E3_ci", "E3_ci", "Word-1 CI")
|
21 |
+
E4 = Task("E4", "E4", "Word-2")
|
22 |
+
E4_ci = Task("E4_ci", "E4_ci", "Word-2 CI")
|
23 |
+
E5 = Task("E5", "E5", "Syntax-1")
|
24 |
+
E5_ci = Task("E5_ci", "E5_ci", "Syntax-1 CI")
|
25 |
+
E6 = Task("E6", "E6", "Syntax-2")
|
26 |
+
E6_ci = Task("E6_ci", "E6_ci", "Syntax-2 CI")
|
27 |
+
E7 = Task("E7", "E7", "Meaning-1")
|
28 |
+
E7_ci = Task("E7_ci", "E7_ci", "Meaning-1 CI")
|
29 |
+
E8 = Task("E8", "E8", "Meaning-2")
|
30 |
+
E8_ci = Task("E8_ci", "E8_ci", "Meaning-2 CI")
|
31 |
+
E9 = Task("E9", "E9", "Discourse-1")
|
32 |
+
E9_ci = Task("E9_ci", "E9_ci", "Discourse-1 CI")
|
33 |
+
E10 = Task("E10", "E10", "Discourse-2")
|
34 |
+
E10_ci = Task("E10_ci", "E10_ci", "Discourse-2 CI")
|
35 |
|
36 |
|
37 |
|
|
|
51 |
|
52 |
This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
|
53 |
|
54 |
+
1. **Sound:** Sound Shape Association<br>
|
55 |
+
2. **Sound:** Sound Gender Association<br>
|
56 |
3. **Word:** Word Length and Predictivity<br>
|
57 |
4. **Word:** Word Meaning Priming<br>
|
58 |
5. **Syntax:** Structural Priming<br>
|