pminervini commited on
Commit
db18e72
2 Parent(s): 942ae30 17800c0

Merge branch 'main' of https://huggingface.co/spaces/pminervini/hallucinations-leaderboard into main

Browse files
src/backend/tasks/selfcheckgpt/task.py CHANGED
@@ -36,7 +36,8 @@ class SelfCheckGpt(Task):
36
  self.selfcheckgpt = SelfCheckMQAG(device=self.selfcheckgpt_device)
37
  elif self.selfcheckgpt_type == 'SelfCheckNLI':
38
  self.selfcheckgpt = SelfCheckNLI(device=self.selfcheckgpt_device)
39
-
 
40
  def has_training_docs(self):
41
  return False
42
 
@@ -105,6 +106,20 @@ class SelfCheckGpt(Task):
105
  sentences = sentences,
106
  sampled_passages = other_responses,
107
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  selfcheckgpt_scores_avg = sum(selfcheckgpt_scores) / len(selfcheckgpt_scores) if len(selfcheckgpt_scores) > 0 else 0
110
  selfcheckgpt_scores_max = max(selfcheckgpt_scores)
 
36
  self.selfcheckgpt = SelfCheckMQAG(device=self.selfcheckgpt_device)
37
  elif self.selfcheckgpt_type == 'SelfCheckNLI':
38
  self.selfcheckgpt = SelfCheckNLI(device=self.selfcheckgpt_device)
39
+ self.SelfCheckNLI_error_cnt = 0
40
+
41
  def has_training_docs(self):
42
  return False
43
 
 
106
  sentences = sentences,
107
  sampled_passages = other_responses,
108
  )
109
+
110
+ if len(selfcheckgpt_scores) == 0:
111
+ self.SelfCheckNLI_error_cnt += 1
112
+ print(f"SelfCheckNLI Warning.SelfCheckNLI_error_cnt:{self.SelfCheckNLI_error_cnt}. This instance is marked as hallucinated with 1.0.")
113
+ result = {'avg-selfcheckgpt': 1.0, 'max-selfcheckgpt': 1.0}
114
+
115
+ else:
116
+ threshold = 0.5
117
+ # passage is hallucianted if one sentence is hallucinated. It's very strict.
118
+ selfcheckgpt_scores_max = 1.0 if max(selfcheckgpt_scores) > threshold else 0.0
119
+ # passage is hallucianted if average score of all sentences is hallucinated.
120
+ selfcheckgpt_scores_avg = 1.0 if sum(selfcheckgpt_scores) / len(selfcheckgpt_scores) > threshold else 0.0
121
+ result = {'avg-selfcheckgpt': selfcheckgpt_scores_avg, 'max-selfcheckgpt': selfcheckgpt_scores_max}
122
+ return result
123
 
124
  selfcheckgpt_scores_avg = sum(selfcheckgpt_scores) / len(selfcheckgpt_scores) if len(selfcheckgpt_scores) > 0 else 0
125
  selfcheckgpt_scores_max = max(selfcheckgpt_scores)