XufengDuan commited on
Commit
6d8d412
1 Parent(s): efa3bcc

update scripts

Browse files
Files changed (1) hide show
  1. src/display/about.py +22 -22
src/display/about.py CHANGED
@@ -12,26 +12,26 @@ class Tasks(Enum):
12
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
13
  Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
14
  Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
15
- E1 = Task("E1", "E1", "E1 Humanlike %")
16
- E1_ci = Task("E1_ci", "E1_ci", "E1 CI")
17
- E2 = Task("E2", "E2", "E2 Humanlike %")
18
- E2_ci = Task("E2_ci", "E2_ci", "E2 CI")
19
- E3 = Task("E3", "E3", "E3 Humanlike %")
20
- E3_ci = Task("E3_ci", "E3_ci", "E3 CI")
21
- E4 = Task("E4", "E4", "E4 Humanlike %")
22
- E4_ci = Task("E4_ci", "E4_ci", "E4 CI")
23
- E5 = Task("E5", "E5", "E5 Humanlike %")
24
- E5_ci = Task("E5_ci", "E5_ci", "E5 CI")
25
- E6 = Task("E6", "E6", "E6 Humanlike %")
26
- E6_ci = Task("E6_ci", "E6_ci", "E6 CI")
27
- E7 = Task("E7", "E7", "E7 Humanlike %")
28
- E7_ci = Task("E7_ci", "E7_ci", "E7 CI")
29
- E8 = Task("E8", "E8", "E8 Humanlike %")
30
- E8_ci = Task("E8_ci", "E8_ci", "E8 CI")
31
- E9 = Task("E9", "E9", "E9 Humanlike %")
32
- E9_ci = Task("E9_ci", "E9_ci", "E9 CI")
33
- E10 = Task("E10", "E10", "E10 Humanlike %")
34
- E10_ci = Task("E10_ci", "E10_ci", "E10 CI")
35
 
36
 
37
 
@@ -51,8 +51,8 @@ LLM_BENCHMARKS_TEXT = """
51
 
52
  This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
53
 
54
- 1. **Sounds:** Sound Shape Association<br>
55
- 2. **Sounds:** Sound Gender Association<br>
56
  3. **Word:** Word Length and Predictivity<br>
57
  4. **Word:** Word Meaning Priming<br>
58
  5. **Syntax:** Structural Priming<br>
 
12
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
13
  Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
14
  Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
15
+ E1 = Task("E1", "E1", "Sound-1")
16
+ E1_ci = Task("E1_ci", "E1_ci", "Sound-1 CI")
17
+ E2 = Task("E2", "E2", "Sound-2")
18
+ E2_ci = Task("E2_ci", "E2_ci", "Sound-2 CI")
19
+ E3 = Task("E3", "E3", "Word-1")
20
+ E3_ci = Task("E3_ci", "E3_ci", "Word-1 CI")
21
+ E4 = Task("E4", "E4", "Word-2")
22
+ E4_ci = Task("E4_ci", "E4_ci", "Word-2 CI")
23
+ E5 = Task("E5", "E5", "Syntax-1")
24
+ E5_ci = Task("E5_ci", "E5_ci", "Syntax-1 CI")
25
+ E6 = Task("E6", "E6", "Syntax-2")
26
+ E6_ci = Task("E6_ci", "E6_ci", "Syntax-2 CI")
27
+ E7 = Task("E7", "E7", "Meaning-1")
28
+ E7_ci = Task("E7_ci", "E7_ci", "Meaning-1 CI")
29
+ E8 = Task("E8", "E8", "Meaning-2")
30
+ E8_ci = Task("E8_ci", "E8_ci", "Meaning-2 CI")
31
+ E9 = Task("E9", "E9", "Discourse-1")
32
+ E9_ci = Task("E9_ci", "E9_ci", "Discourse-1 CI")
33
+ E10 = Task("E10", "E10", "Discourse-2")
34
+ E10_ci = Task("E10_ci", "E10_ci", "Discourse-2 CI")
35
 
36
 
37
 
 
51
 
52
  This study aims to compare the similarities between human and model responses in language use by employing ten psycholinguistic tasks:
53
 
54
+ 1. **Sound:** Sound Shape Association<br>
55
+ 2. **Sound:** Sound Gender Association<br>
56
  3. **Word:** Word Length and Predictivity<br>
57
  4. **Word:** Word Meaning Priming<br>
58
  5. **Syntax:** Structural Priming<br>