danielz02 commited on
Commit
a17a7f2
1 Parent(s): 0d80418

Shorten perspective display name

Browse files
src/display/about.py CHANGED
@@ -13,12 +13,12 @@ class Task:
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
  task0 = Task("toxicity", "aggregated-results", "Toxicity")
16
- task1 = Task("stereotype", "aggregated-results", "Stereotype Bias")
17
- task2 = Task("adv", "aggregated-results", "Adversarial Robustness")
18
- task3 = Task("ood", "aggregated-results", "OoD Robustness")
19
- task4 = Task("adv_demo", "aggregated-results", "Robustness to Adversarial Demonstrations")
20
  task5 = Task("privacy", "aggregated-results", "Privacy")
21
- task6 = Task("ethics", "aggregated-results", "Machine Ethics")
22
  task7 = Task("fairness", "aggregated-results", "Fairness")
23
 
24
 
@@ -41,14 +41,14 @@ limitations, and potential risks involved in deploying these state-of-the-art La
41
 
42
  This project is organized around the following eight primary perspectives of trustworthiness, including:
43
 
44
- Toxicity
45
- Stereotype and bias
46
- Adversarial robustness
47
- Out-of-Distribution Robustness
48
- Privacy
49
- Robustness to Adversarial Demonstrations
50
- Machine Ethics
51
- Fairness
52
 
53
  ## Reproducibility
54
  To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
 
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
  task0 = Task("toxicity", "aggregated-results", "Toxicity")
16
+ task1 = Task("stereotype", "aggregated-results", "Stereotype")
17
+ task2 = Task("adv", "aggregated-results", "Adv Robustness")
18
+ task3 = Task("ood", "aggregated-results", "OoD")
19
+ task4 = Task("adv_demo", "aggregated-results", "Adv Demo")
20
  task5 = Task("privacy", "aggregated-results", "Privacy")
21
+ task6 = Task("ethics", "aggregated-results", "Ethics")
22
  task7 = Task("fairness", "aggregated-results", "Fairness")
23
 
24
 
 
41
 
42
  This project is organized around the following eight primary perspectives of trustworthiness, including:
43
 
44
+ + Toxicity
45
+ + Stereotype and bias
46
+ + Adversarial robustness
47
+ + Out-of-Distribution Robustness
48
+ + Privacy
49
+ + Robustness to Adversarial Demonstrations
50
+ + Machine Ethics
51
+ + Fairness
52
 
53
  ## Reproducibility
54
  To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
src/leaderboard/read_evals.py CHANGED
@@ -37,7 +37,6 @@ class EvalResult:
37
  data = json.load(fp)
38
 
39
  config = data.get("config")
40
- print(config)
41
 
42
  # Precision
43
  precision = Precision.from_str(config.get("model_dtype"))
@@ -97,6 +96,7 @@ class EvalResult:
97
  try:
98
  with open(request_file, "r") as f:
99
  request = json.load(f)
 
100
  self.model_type = ModelType.from_str(request.get("model_type", ""))
101
  self.weight_type = WeightType[request.get("weight_type", "Original")]
102
  self.license = request.get("license", "?")
 
37
  data = json.load(fp)
38
 
39
  config = data.get("config")
 
40
 
41
  # Precision
42
  precision = Precision.from_str(config.get("model_dtype"))
 
96
  try:
97
  with open(request_file, "r") as f:
98
  request = json.load(f)
99
+ print("Read Request", request)
100
  self.model_type = ModelType.from_str(request.get("model_type", ""))
101
  self.weight_type = WeightType[request.get("weight_type", "Original")]
102
  self.license = request.get("license", "?")