andrewrreed HF staff commited on
Commit
6fbf558
1 Parent(s): 281ba52

add new categories

Browse files
Files changed (1) hide show
  1. utils.py +9 -0
utils.py CHANGED
@@ -6,9 +6,13 @@ from typing import Literal, List
6
  import pandas as pd
7
  from huggingface_hub import HfFileSystem, hf_hub_download
8
 
 
9
  KEY_TO_CATEGORY_NAME = {
10
  "full": "Overall",
 
11
  "coding": "Coding",
 
 
12
  "long_user": "Longer Query",
13
  "english": "English",
14
  "chinese": "Chinese",
@@ -16,10 +20,14 @@ KEY_TO_CATEGORY_NAME = {
16
  "no_tie": "Exclude Ties",
17
  "no_short": "Exclude Short Query (< 5 tokens)",
18
  "no_refusal": "Exclude Refusal",
 
19
  }
20
  CAT_NAME_TO_EXPLANATION = {
21
  "Overall": "Overall Questions",
 
22
  "Coding": "Coding: whether conversation contains code snippets",
 
 
23
  "Longer Query": "Longer Query (>= 500 tokens)",
24
  "English": "English Prompts",
25
  "Chinese": "Chinese Prompts",
@@ -27,6 +35,7 @@ CAT_NAME_TO_EXPLANATION = {
27
  "Exclude Ties": "Exclude Ties and Bothbad",
28
  "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
29
  "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
 
30
  }
31
 
32
  PROPRIETARY_LICENSES = ["Proprietary", "Proprietory"]
 
6
  import pandas as pd
7
  from huggingface_hub import HfFileSystem, hf_hub_download
8
 
9
+ # from: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/monitor/monitor.py#L389
10
  KEY_TO_CATEGORY_NAME = {
11
  "full": "Overall",
12
+ "dedup": "De-duplicate Top Redundant Queries (soon to be default)",
13
  "coding": "Coding",
14
+ "hard_6": "Hard Prompts (Overall)",
15
+ "hard_english_6": "Hard Prompts (English)",
16
  "long_user": "Longer Query",
17
  "english": "English",
18
  "chinese": "Chinese",
 
20
  "no_tie": "Exclude Ties",
21
  "no_short": "Exclude Short Query (< 5 tokens)",
22
  "no_refusal": "Exclude Refusal",
23
+ "overall_limit_5_user_vote": "overall_limit_5_user_vote",
24
  }
25
  CAT_NAME_TO_EXPLANATION = {
26
  "Overall": "Overall Questions",
27
+ "De-duplicate Top Redundant Queries (soon to be default)": "De-duplicate top redundant queries (top 0.1%). See details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/#note-enhancing-quality-through-de-duplication).",
28
  "Coding": "Coding: whether conversation contains code snippets",
29
+ "Hard Prompts (Overall)": "Hard Prompts (Overall): details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
30
+ "Hard Prompts (English)": "Hard Prompts (English), note: the delta is to English Category. details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
31
  "Longer Query": "Longer Query (>= 500 tokens)",
32
  "English": "English Prompts",
33
  "Chinese": "Chinese Prompts",
 
35
  "Exclude Ties": "Exclude Ties and Bothbad",
36
  "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
37
  "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
38
+ "overall_limit_5_user_vote": "overall_limit_5_user_vote",
39
  }
40
 
41
  PROPRIETARY_LICENSES = ["Proprietary", "Proprietory"]