Spaces:

hallucinations-leaderboard
/

leaderboard

Running on CPU Upgrade

App Files Files Community

pingnieuk commited on Dec 23, 2023

Commit

d3d434b

•

1 Parent(s): 3d1d33c

Update src/backend/tasks/selfcheckgpt/task.py

Browse files

Fix typos and default task metric in selfcheckgpt.

Files changed (1) hide show

src/backend/tasks/selfcheckgpt/task.py +14 -7

src/backend/tasks/selfcheckgpt/task.py CHANGED Viewed

@@ -17,14 +17,14 @@ class SelfCheckGpt(Task):
     VERSION = 0.0
     DATASET_PATH = "potsawee/wiki_bio_gpt3_hallucination"
     DATASET_NAME = None
     def __init__(self, data_dir=None, cache_dir=None, download_mode=None, config=None):
         super().__init__(data_dir=data_dir, cache_dir=cache_dir, download_mode=download_mode, config=config)
         self.generation_kwargs = {"temperature": 0.0, "do_sample": False}
         self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
         self.generation_kwargs_sampling = {"temperature": 1.0, "do_sample": False}
-        self.selfcheckgpt_type = os.environ.get('SELFCHECKGPTTYPE', 'SelfCheckNgram')
         self.selfcheckgpt_device = os.environ.get('SELFCHECKGPTDEVICE', DEVICE)
         self.selfcheckgpt_nlp = spacy.load("en_core_web_sm")
@@ -92,12 +92,19 @@ class SelfCheckGpt(Task):
         elif self.selfcheckgpt_type == 'SelfCheckBERTScore':
             selfcheckgpt_scores = self.selfcheckgpt.predict(sentences=sentences, sampled_passages=other_responses)
         elif self.selfcheckgpt_type == 'SelfCheckMQAG':
-            selfcheckgpt_scores = self.selfcheckgpt.predict(sentences=sentences, sampled_passages=other_responses)
         elif self.selfcheckgpt_type == 'SelfCheckNLI':
-            selfcheckgpt_scores = self.selfcheckgpt.predict(sentences=sentences, passage=response_temperature_0, sampled_passages=other_responses,
-                                                            num_questions_per_sent=5,           # number of questions to be drawn
-                                                            scoring_method='bayes_with_alpha',  # options = 'counting', 'bayes', 'bayes_with_alpha'
-                                                            beta1=0.8, beta2=0.8)               # additional params depending on scoring_method
         selfcheckgpt_scores_avg = sum(selfcheckgpt_scores) / len(selfcheckgpt_scores) if len(selfcheckgpt_scores) > 0 else 0
         selfcheckgpt_scores_max = max(selfcheckgpt_scores)

     VERSION = 0.0
     DATASET_PATH = "potsawee/wiki_bio_gpt3_hallucination"
     DATASET_NAME = None
+    OUTPUT_TYPE = 'generate_until'
     def __init__(self, data_dir=None, cache_dir=None, download_mode=None, config=None):
         super().__init__(data_dir=data_dir, cache_dir=cache_dir, download_mode=download_mode, config=config)
         self.generation_kwargs = {"temperature": 0.0, "do_sample": False}
         self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
         self.generation_kwargs_sampling = {"temperature": 1.0, "do_sample": False}
+        self.selfcheckgpt_type = os.environ.get('SELFCHECKGPTTYPE', 'SelfCheckNLI')
         self.selfcheckgpt_device = os.environ.get('SELFCHECKGPTDEVICE', DEVICE)
         self.selfcheckgpt_nlp = spacy.load("en_core_web_sm")
         elif self.selfcheckgpt_type == 'SelfCheckBERTScore':
             selfcheckgpt_scores = self.selfcheckgpt.predict(sentences=sentences, sampled_passages=other_responses)
         elif self.selfcheckgpt_type == 'SelfCheckMQAG':
+            selfcheckgpt_scores = self.selfcheckgpt.predict(
+                sentences = sentences,
+                passage = response_temperature_0,
+                sampled_passages = other_responses,
+                num_questions_per_sent = 5,          # number of questions to be drawn
+                scoring_method = 'bayes_with_alpha', # options = 'counting', 'bayes', 'bayes_with_alpha'
+                beta1 = 0.8, beta2 = 0.8,            # additional params depending on scoring_method
+                )
         elif self.selfcheckgpt_type == 'SelfCheckNLI':
+            selfcheckgpt_scores = self.selfcheckgpt.predict(
+                sentences = sentences,
+                sampled_passages = other_responses,
+                )
         selfcheckgpt_scores_avg = sum(selfcheckgpt_scores) / len(selfcheckgpt_scores) if len(selfcheckgpt_scores) > 0 else 0
         selfcheckgpt_scores_max = max(selfcheckgpt_scores)