Upload TestEnsembleQAPipeline
Browse files- config.json +1 -1
- test_ensemble.py +37 -13
config.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"classifier_dropout": 0.0,
|
7 |
"custom_pipelines": {
|
8 |
"test-qa": {
|
9 |
-
"impl": "
|
10 |
"pt": [
|
11 |
"AutoModelForSeq2SeqLM"
|
12 |
],
|
|
|
6 |
"classifier_dropout": 0.0,
|
7 |
"custom_pipelines": {
|
8 |
"test-qa": {
|
9 |
+
"impl": "test_ensemble.TestEnsembleQAPipeline",
|
10 |
"pt": [
|
11 |
"AutoModelForSeq2SeqLM"
|
12 |
],
|
test_ensemble.py
CHANGED
@@ -9,25 +9,49 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
import gzip
|
11 |
from transformers import Text2TextGenerationPipeline
|
|
|
|
|
|
|
12 |
|
13 |
class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
|
14 |
-
def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
|
15 |
super().__init__(model=model, tokenizer=tokenizer, framework=framework)
|
16 |
-
self.quiz_bowl_model = QuizBowlModel() #
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def _forward(self, model_inputs, **generate_kwargs):
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
def postprocess(self, model_outputs):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
29 |
return results
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
class QuizBowlModel:
|
32 |
def __init__(self):
|
33 |
self.load_models()
|
@@ -55,10 +79,10 @@ class QuizBowlModel:
|
|
55 |
"""Generate answers from all models for given questions"""
|
56 |
total_answers = self.generate_answers(question_texts)
|
57 |
# Display the model's guesses before voting
|
58 |
-
print("Answers Before Voting Mechanism:")
|
59 |
|
60 |
-
for question, model_answers in zip(question_texts, total_answers):
|
61 |
-
|
62 |
return self.ensemble_tfidf_voting(total_answers)
|
63 |
|
64 |
def generate_answers(self, question_texts):
|
|
|
9 |
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
import gzip
|
11 |
from transformers import Text2TextGenerationPipeline
|
12 |
+
from transformers import Text2TextGenerationPipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
13 |
+
import torch
|
14 |
+
import numpy as np
|
15 |
|
16 |
class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
|
17 |
+
def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
|
18 |
super().__init__(model=model, tokenizer=tokenizer, framework=framework)
|
19 |
+
self.quiz_bowl_model = QuizBowlModel() # Initialize your QuizBowl model
|
20 |
+
|
21 |
+
def preprocess(self, text, **kwargs):
|
22 |
+
"""Prepare the text inputs for processing."""
|
23 |
+
prompt = "Please provide a concise answer to the following question:"
|
24 |
+
input_text = f"{prompt} {text}"
|
25 |
+
return self.tokenizer(input_text, return_tensors=self.framework, padding=True, truncation=True)
|
26 |
|
27 |
def _forward(self, model_inputs, **generate_kwargs):
|
28 |
+
"""Forward pass to generate outputs from the model."""
|
29 |
+
if self.framework == "pt":
|
30 |
+
model_outputs = self.model.generate(**model_inputs, **generate_kwargs, return_dict_in_generate=True, output_scores=True)
|
31 |
+
else:
|
32 |
+
raise NotImplementedError("TensorFlow framework is not supported in this pipeline.")
|
33 |
+
return model_outputs
|
34 |
|
35 |
def postprocess(self, model_outputs):
|
36 |
+
"""Process model outputs to extract answers and confidence scores."""
|
37 |
+
results = []
|
38 |
+
for output in model_outputs.sequences:
|
39 |
+
decoded_text = self.tokenizer.decode(output, skip_special_tokens=True)
|
40 |
+
scores = self.calculate_confidence(model_outputs.scores)
|
41 |
+
results.append({'guess': decoded_text, 'confidence': scores})
|
42 |
return results
|
43 |
|
44 |
+
def calculate_confidence(self, scores):
|
45 |
+
"""Calculate confidence from the model's score outputs."""
|
46 |
+
if scores:
|
47 |
+
log_probs = [torch.nn.functional.log_softmax(score, dim=-1) for score in scores]
|
48 |
+
avg_scores = [log_probs[i][0, output[i + 1]].item() for i in range(len(output) - 1)]
|
49 |
+
confidence_score = np.exp(np.mean(avg_scores))
|
50 |
+
else:
|
51 |
+
confidence_score = None
|
52 |
+
return confidence_score
|
53 |
+
|
54 |
+
|
55 |
class QuizBowlModel:
|
56 |
def __init__(self):
|
57 |
self.load_models()
|
|
|
79 |
"""Generate answers from all models for given questions"""
|
80 |
total_answers = self.generate_answers(question_texts)
|
81 |
# Display the model's guesses before voting
|
82 |
+
# print("Answers Before Voting Mechanism:")
|
83 |
|
84 |
+
# for question, model_answers in zip(question_texts, total_answers):
|
85 |
+
# print(f"{question}\nModel Guesses: {model_answers}\n\n")
|
86 |
return self.ensemble_tfidf_voting(total_answers)
|
87 |
|
88 |
def generate_answers(self, question_texts):
|