dbalasub commited on
Commit
e0a5b35
1 Parent(s): b2f7e08

Upload TestEnsembleQAPipeline

Browse files
Files changed (2) hide show
  1. config.json +1 -1
  2. test_ensemble.py +37 -13
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "classifier_dropout": 0.0,
7
  "custom_pipelines": {
8
  "test-qa": {
9
- "impl": "__main__.TestEnsembleQAPipeline",
10
  "pt": [
11
  "AutoModelForSeq2SeqLM"
12
  ],
 
6
  "classifier_dropout": 0.0,
7
  "custom_pipelines": {
8
  "test-qa": {
9
+ "impl": "test_ensemble.TestEnsembleQAPipeline",
10
  "pt": [
11
  "AutoModelForSeq2SeqLM"
12
  ],
test_ensemble.py CHANGED
@@ -9,25 +9,49 @@ from sklearn.feature_extraction.text import TfidfVectorizer
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import gzip
11
  from transformers import Text2TextGenerationPipeline
 
 
 
12
 
13
  class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
14
- def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs): # Add **kwargs to accept additional unused arguments
15
  super().__init__(model=model, tokenizer=tokenizer, framework=framework)
16
- self.quiz_bowl_model = QuizBowlModel() # Initializes your QuizBowl model
 
 
 
 
 
 
17
 
18
  def _forward(self, model_inputs, **generate_kwargs):
19
- questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in model_inputs["input_ids"]]
20
- results = self.quiz_bowl_model.guess_and_buzz(questions)
21
- return results
 
 
 
22
 
23
  def postprocess(self, model_outputs):
24
- results = {}
25
- for output in model_outputs:
26
- guess_text = output['guess']
27
- confidence = output['confidence']
28
- results = {'guess': guess_text, 'confidence': confidence}
 
29
  return results
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  class QuizBowlModel:
32
  def __init__(self):
33
  self.load_models()
@@ -55,10 +79,10 @@ class QuizBowlModel:
55
  """Generate answers from all models for given questions"""
56
  total_answers = self.generate_answers(question_texts)
57
  # Display the model's guesses before voting
58
- print("Answers Before Voting Mechanism:")
59
 
60
- for question, model_answers in zip(question_texts, total_answers):
61
- print(f"{question}\nModel Guesses: {model_answers}\n\n")
62
  return self.ensemble_tfidf_voting(total_answers)
63
 
64
  def generate_answers(self, question_texts):
 
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import gzip
11
  from transformers import Text2TextGenerationPipeline
12
+ from transformers import Text2TextGenerationPipeline, AutoModelForSeq2SeqLM, AutoTokenizer
13
+ import torch
14
+ import numpy as np
15
 
16
  class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
17
+ def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
18
  super().__init__(model=model, tokenizer=tokenizer, framework=framework)
19
+ self.quiz_bowl_model = QuizBowlModel() # Initialize your QuizBowl model
20
+
21
+ def preprocess(self, text, **kwargs):
22
+ """Prepare the text inputs for processing."""
23
+ prompt = "Please provide a concise answer to the following question:"
24
+ input_text = f"{prompt} {text}"
25
+ return self.tokenizer(input_text, return_tensors=self.framework, padding=True, truncation=True)
26
 
27
  def _forward(self, model_inputs, **generate_kwargs):
28
+ """Forward pass to generate outputs from the model."""
29
+ if self.framework == "pt":
30
+ model_outputs = self.model.generate(**model_inputs, **generate_kwargs, return_dict_in_generate=True, output_scores=True)
31
+ else:
32
+ raise NotImplementedError("TensorFlow framework is not supported in this pipeline.")
33
+ return model_outputs
34
 
35
  def postprocess(self, model_outputs):
36
+ """Process model outputs to extract answers and confidence scores."""
37
+ results = []
38
+ for output in model_outputs.sequences:
39
+ decoded_text = self.tokenizer.decode(output, skip_special_tokens=True)
40
+ scores = self.calculate_confidence(model_outputs.scores)
41
+ results.append({'guess': decoded_text, 'confidence': scores})
42
  return results
43
 
44
+ def calculate_confidence(self, scores):
45
+ """Calculate confidence from the model's score outputs."""
46
+ if scores:
47
+ log_probs = [torch.nn.functional.log_softmax(score, dim=-1) for score in scores]
48
+ avg_scores = [log_probs[i][0, output[i + 1]].item() for i in range(len(output) - 1)]
49
+ confidence_score = np.exp(np.mean(avg_scores))
50
+ else:
51
+ confidence_score = None
52
+ return confidence_score
53
+
54
+
55
  class QuizBowlModel:
56
  def __init__(self):
57
  self.load_models()
 
79
  """Generate answers from all models for given questions"""
80
  total_answers = self.generate_answers(question_texts)
81
  # Display the model's guesses before voting
82
+ # print("Answers Before Voting Mechanism:")
83
 
84
+ # for question, model_answers in zip(question_texts, total_answers):
85
+ # print(f"{question}\nModel Guesses: {model_answers}\n\n")
86
  return self.ensemble_tfidf_voting(total_answers)
87
 
88
  def generate_answers(self, question_texts):