dbalasub commited on
Commit
88c6e75
1 Parent(s): 2a573b2

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +129 -120
test.py CHANGED
@@ -1,121 +1,130 @@
1
- from typing import List, Tuple
2
- import numpy as np
3
- import pandas as pd
4
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer, ElectraModel, ElectraForCausalLM, GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
5
- import torch
6
- import os
7
- import json
8
- from sklearn.feature_extraction.text import TfidfVectorizer
9
- from sklearn.metrics.pairwise import cosine_similarity
10
- import gzip
11
- from transformers import Text2TextGenerationPipeline
12
-
13
- class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
14
- def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
15
- super().__init__(model=model, tokenizer=tokenizer, framework=framework)
16
- self.quiz_bowl_model = QuizBowlModel() # Initializes your QuizBowl model
17
-
18
- def _forward(self, model_inputs, **generate_kwargs):
19
- questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in model_inputs["input_ids"]]
20
- results = self.quiz_bowl_model.guess_and_buzz(questions)
21
- return results
22
-
23
- def postprocess(self, model_outputs):
24
- results = {}
25
- for output in model_outputs:
26
- guess_text = output[0]
27
- confidence = output[1]
28
- results = {'guess': guess_text, 'confidence': confidence}
29
- return results
30
-
31
- # # From class eval.py code
32
- # def normalize_answer(answer):
33
- # """
34
- # Remove superflous components to create a normalized form of an answer that
35
- # can be more easily compared.
36
- # """
37
- # from unidecode import unidecode
38
-
39
- # if answer is None:
40
- # return ''
41
- # reduced = unidecode(answer)
42
- # reduced = reduced.replace("_", " ")
43
- # if "(" in reduced:
44
- # reduced = reduced.split("(")[0]
45
- # reduced = "".join(x for x in reduced.lower() if x not in string.punctuation)
46
- # reduced = reduced.strip()
47
-
48
- # for bad_start in ["the ", "a ", "an "]:
49
- # if reduced.startswith(bad_start):
50
- # reduced = reduced[len(bad_start):]
51
- # return reduced.strip()
52
-
53
- class QuizBowlModel:
54
- def __init__(self):
55
- model_configs = {
56
- 'flan-t5-large': {'model': 'google/flan-t5-large', 'tokenizer': 'google/flan-t5-large'},
57
- 'flan-t5-small': {'model': 'google/flan-t5-small', 'tokenizer': 'google/flan-t5-small'},
58
- 'flan-t5-base': {'model': 'google/flan-t5-base', 'tokenizer': 'google/flan-t5-base'},
59
- 'flan-t5-finetuned': {'model': 'dbalasub/finetuned-t5-qanta', 'tokenizer': 'dbalasub/finetuned-t5-qanta'},
60
- 'flan-t5-adv-finetuned': {'model': 'dbalasub/finetuned-t5-adv-qanta', 'tokenizer': 'dbalasub/finetuned-t5-adv-qanta'}
61
- }
62
- self.models = {}
63
- self.tokenizers = {}
64
- self.load_models(model_configs)
65
-
66
- def load_models(self, model_configs):
67
- """Load multiple models based on configuration."""
68
- for model_name, config in model_configs.items():
69
- tokenizer = AutoTokenizer.from_pretrained(config['tokenizer'])
70
- model = AutoModelForSeq2SeqLM.from_pretrained(config['model'])
71
- model.eval()
72
- self.models[model_name] = model
73
- self.tokenizers[model_name] = tokenizer
74
-
75
- def guess_and_buzz(self, question_texts):
76
- total_answers = [self.generate_answers(question) for question in question_texts]
77
- # here to check all models responses if needed
78
- # for question, model_answers in zip(question_texts, total_answers):
79
- # print(f"{question}\nModel Guesses: {model_answers}\n")
80
- return [self.ensemble_tfidf_voting(answers) for answers in total_answers]
81
-
82
- def generate_answers(self, question):
83
- raw_answers = []
84
- for model_name, model in self.models.items():
85
- tokenizer = self.tokenizers[model_name]
86
- input_ids = tokenizer(question, return_tensors="pt", padding=True, truncation=True).input_ids
87
- with torch.no_grad():
88
- outputs = model.generate(input_ids, max_new_tokens=5, output_scores=True, return_dict_in_generate=True)
89
- decoded_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
90
- confidence_score = self.calculate_confidence(outputs.scores)
91
- raw_answers.append((decoded_text, confidence_score))
92
-
93
- total_scores = sum([score for _, score in raw_answers])
94
- answers = [(text, score / total_scores if total_scores > 0 else 0) for text, score in raw_answers]
95
-
96
- return answers
97
-
98
- def calculate_confidence(self, scores):
99
- if scores:
100
- log_probs = [torch.nn.functional.log_softmax(score, dim=-1) for score in scores]
101
- selected_log_probs = [log_probs[i][0, scores[i].argmax()].item() for i in range(len(log_probs))]
102
- confidence_score = np.exp(np.mean(selected_log_probs))
103
- else:
104
- confidence_score = None
105
- return confidence_score
106
-
107
- def ensemble_tfidf_voting(self, answers):
108
- return max(answers, key=lambda x: x[1]) if answers else (None, 0)
109
- # from transformers.pipelines import Pipeline, PIPELINE_REGISTRY
110
- # from transformers import AutoModelForSeq2SeqLM, TFAutoModelForSeq2SeqLM
111
- # from test_ensemble import TestEnsembleQAPipeline
112
- # from transformers import pipeline
113
-
114
- # # Register your custom pipeline for PyTorch and TensorFlow models
115
- # PIPELINE_REGISTRY.register_pipeline("test-qa",
116
- # pipeline_class=TestEnsembleQAPipeline,
117
- # pt_model=AutoModelForSeq2SeqLM,
118
- # tf_model=TFAutoModelForSeq2SeqLM)
119
- # qa_pipe = pipeline("test-qa", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
120
-
 
 
 
 
 
 
 
 
 
121
  # qa_pipe.push_to_hub("test-qa")
 
1
+ from typing import List, Tuple
2
+ import numpy as np
3
+ import pandas as pd
4
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer, ElectraModel, ElectraForCausalLM, GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
5
+ import torch
6
+ import os
7
+ import json
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+ import gzip
11
+ from transformers import Text2TextGenerationPipeline
12
+ """
13
+ Works cited:
14
+ 'flan-t5-large': https://huggingface.co/google/flan-t5-large,
15
+ 'flan-t5-small': https://huggingface.co/google/flan-t5-small,
16
+ 'flan-t5-base': https://huggingface.co/google/flan-t5-base,
17
+ 'flan-t5-finetuned': finetuned version of base on the qanta guesstrain dataset,
18
+ 'flan-t5-adv-finetuned': finetuned version of finetuned base model on the adversarial dataset in qanta
19
+ """
20
+ class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
21
+ def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
22
+ super().__init__(model=model, tokenizer=tokenizer, framework=framework)
23
+ self.quiz_bowl_model = QuizBowlModel() # Initializes your QuizBowl model
24
+
25
+ def _forward(self, model_inputs, **generate_kwargs):
26
+ questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in model_inputs["input_ids"]]
27
+ results = self.quiz_bowl_model.guess_and_buzz(questions)
28
+ return results
29
+
30
+ def postprocess(self, model_outputs):
31
+ results = {}
32
+ for output in model_outputs:
33
+ guess_text = output[0]
34
+ confidence = output[1]
35
+ results = {'guess': guess_text, 'confidence': confidence}
36
+ return results
37
+
38
+ # # From class eval.py code
39
+ # def normalize_answer(answer):
40
+ # """
41
+ # Remove superflous components to create a normalized form of an answer that
42
+ # can be more easily compared.
43
+ # """
44
+ # from unidecode import unidecode
45
+
46
+ # if answer is None:
47
+ # return ''
48
+ # reduced = unidecode(answer)
49
+ # reduced = reduced.replace("_", " ")
50
+ # if "(" in reduced:
51
+ # reduced = reduced.split("(")[0]
52
+ # reduced = "".join(x for x in reduced.lower() if x not in string.punctuation)
53
+ # reduced = reduced.strip()
54
+
55
+ # for bad_start in ["the ", "a ", "an "]:
56
+ # if reduced.startswith(bad_start):
57
+ # reduced = reduced[len(bad_start):]
58
+ # return reduced.strip()
59
+ class QuizBowlModel:
60
+ def __init__(self):
61
+ model_configs = {
62
+ 'flan-t5-large': {'model': 'google/flan-t5-large', 'tokenizer': 'google/flan-t5-large'},
63
+ 'flan-t5-small': {'model': 'google/flan-t5-small', 'tokenizer': 'google/flan-t5-small'},
64
+ 'flan-t5-base': {'model': 'google/flan-t5-base', 'tokenizer': 'google/flan-t5-base'},
65
+ 'flan-t5-finetuned': {'model': 'dbalasub/finetuned-t5-qanta', 'tokenizer': 'dbalasub/finetuned-t5-qanta'},
66
+ 'flan-t5-adv-finetuned': {'model': 'dbalasub/finetuned-t5-adv-qanta', 'tokenizer': 'dbalasub/finetuned-t5-adv-qanta'}
67
+ }
68
+ self.models = {}
69
+ self.tokenizers = {}
70
+ self.load_models(model_configs)
71
+
72
+ def load_models(self, model_configs):
73
+ """Load multiple models based on configuration."""
74
+ for model_name, config in model_configs.items():
75
+ tokenizer = AutoTokenizer.from_pretrained(config['tokenizer'])
76
+ model = AutoModelForSeq2SeqLM.from_pretrained(config['model'])
77
+ model.eval()
78
+ self.models[model_name] = model
79
+ self.tokenizers[model_name] = tokenizer
80
+
81
+ def guess_and_buzz(self, question_texts):
82
+ total_answers = [self.generate_answers(question) for question in question_texts]
83
+ # here to check all models responses if needed
84
+ # for question, model_answers in zip(question_texts, total_answers):
85
+ # print(f"{question}\nModel Guesses: {model_answers}\n")
86
+ return [self.ensemble_tfidf_voting(answers) for answers in total_answers]
87
+
88
+ def generate_answers(self, question):
89
+ raw_answers = []
90
+ for model_name, model in self.models.items():
91
+ tokenizer = self.tokenizers[model_name]
92
+ input_ids = tokenizer(question, return_tensors="pt", padding=True, truncation=False).input_ids
93
+ with torch.no_grad():
94
+ outputs = model.generate(input_ids, max_new_tokens=5, output_scores=True, return_dict_in_generate=True)
95
+ decoded_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
96
+ confidence_score = self.calculate_confidence(outputs.scores)
97
+ raw_answers.append((decoded_text, confidence_score))
98
+
99
+ # normalization if needed
100
+ # total_scores = sum([score for _, score in raw_answers])
101
+ # answers = [(text, score / total_scores if total_scores > 0 else 0) for text, score in raw_answers]
102
+
103
+ return raw_answers
104
+
105
+ def calculate_confidence(self, scores):
106
+ if scores:
107
+ log_probs = [torch.nn.functional.log_softmax(score, dim=-1) for score in scores]
108
+ selected_log_probs = [log_probs[i][0, scores[i].argmax()].item() for i in range(len(log_probs))]
109
+ confidence_score = np.exp(np.mean(selected_log_probs))
110
+ else:
111
+ confidence_score = None
112
+ return confidence_score
113
+
114
+ def ensemble_tfidf_voting(self, all_answers):
115
+ return max(all_answers, key=lambda x: x[1]) if all_answers else (None, 0)
116
+
117
+
118
+ # from transformers.pipelines import Pipeline, PIPELINE_REGISTRY
119
+ # from transformers import AutoModelForSeq2SeqLM, TFAutoModelForSeq2SeqLM
120
+ # from test_ensemble import TestEnsembleQAPipeline
121
+ # from transformers import pipeline
122
+
123
+ # # Register your custom pipeline for PyTorch and TensorFlow models
124
+ # PIPELINE_REGISTRY.register_pipeline("test-qa",
125
+ # pipeline_class=TestEnsembleQAPipeline,
126
+ # pt_model=AutoModelForSeq2SeqLM,
127
+ # tf_model=TFAutoModelForSeq2SeqLM)
128
+ # qa_pipe = pipeline("test-qa", model="google/flan-t5-small", tokenizer="google/flan-t5-small")
129
+
130
  # qa_pipe.push_to_hub("test-qa")