natolambert commited on
Commit
bb95637
1 Parent(s): 61c1fca
Files changed (1) hide show
  1. src/constants.py +3 -2
src/constants.py CHANGED
@@ -31,7 +31,7 @@ example_counts = {
31
  "mt-bench-easy": 28,
32
  "mt-bench-med": 40,
33
  "mt-bench-hard": 37,
34
- "math-prm": 984, # actual length 447, upweighting to be equal to code
35
  "refusals-dangerous": 100,
36
  "refusals-offensive": 100,
37
  "llmbar-natural": 100,
@@ -54,5 +54,6 @@ subset_mapping = {
54
  "Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
55
  "Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
56
  "Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
57
- "Reasoning": ["math-prm", "hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
 
58
  }
 
31
  "mt-bench-easy": 28,
32
  "mt-bench-med": 40,
33
  "mt-bench-hard": 37,
34
+ # "math-prm": 984, # actual length 447, upweighting to be equal to code
35
  "refusals-dangerous": 100,
36
  "refusals-offensive": 100,
37
  "llmbar-natural": 100,
 
54
  "Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
55
  "Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
56
  "Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
57
+ "Reasoning": [#"math-prm",
58
+ "hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
59
  }