|
from typing import List, Tuple
|
|
import numpy as np
|
|
import pandas as pd
|
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer, ElectraModel, ElectraForCausalLM, GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
|
|
import torch
|
|
import os
|
|
import json
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
import gzip
|
|
from transformers import Text2TextGenerationPipeline
|
|
|
|
class TestEnsembleQAPipeline(Text2TextGenerationPipeline):
|
|
def __init__(self, model=None, tokenizer=None, framework="pt", **kwargs):
|
|
super().__init__(model=model, tokenizer=tokenizer, framework=framework)
|
|
self.quiz_bowl_model = QuizBowlModel()
|
|
|
|
def _forward(self, model_inputs, **generate_kwargs):
|
|
questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in model_inputs["input_ids"]]
|
|
results = self.quiz_bowl_model.guess_and_buzz(questions)
|
|
return results
|
|
|
|
def postprocess(self, model_outputs):
|
|
results = {}
|
|
for output in model_outputs:
|
|
guess_text = output['guess']
|
|
confidence = output['confidence']
|
|
results = {'guess': guess_text, 'confidence': confidence}
|
|
return results
|
|
|
|
kTOY_DATA = {"tiny": [{"text": "capital England", "page": "London"},
|
|
{"text": "capital Russia", "page": "Moscow"},
|
|
{"text": "currency England", "page": "Pound"},
|
|
{"text": "currency Russia", "page": "Rouble"}],
|
|
"train": [{'page': 'Maine', 'text': 'For 10 points, name this New England state with capital at Augusta.'},
|
|
{'page': 'Massachusetts', 'text': 'For ten points, identify this New England state with capital at Boston.'},
|
|
{'page': 'Boston', 'text': 'For 10 points, name this city in New England, the capital of Massachusetts.'},
|
|
{'page': 'Jane_Austen', 'text': 'For 10 points, name this author of Pride and Prejudice.'},
|
|
{'page': 'Jane_Austen', 'text': 'For 10 points, name this author of Emma and Pride and Prejudice.'},
|
|
{'page': 'Wolfgang_Amadeus_Mozart', 'text': 'For 10 points, name this composer of Magic Flute and Don Giovanni.'},
|
|
{'page': 'Wolfgang_Amadeus_Mozart', 'text': 'Name this composer who wrote a famous requiem and The Magic Flute.'},
|
|
{'page': "Gresham's_law", 'text': 'For 10 points, name this economic principle which states that bad money drives good money out of circulation.'},
|
|
{'page': "Gresham's_law", 'text': "This is an example -- for 10 points \\-- of what Scotsman's economic law, which states that bad money drives out good?"},
|
|
{'page': "Gresham's_law", 'text': 'FTP name this economic law which, in simplest terms, states that bad money drives out the good.'},
|
|
{'page': 'Rhode_Island', 'text': "This colony's Touro Synagogue is the oldest in the United States."},
|
|
{'page': 'Lima', 'text': 'It is the site of the National University of San Marcos, the oldest university in South America.'},
|
|
{'page': 'College_of_William_&_Mary', 'text': 'For 10 points, identify this oldest public university in the United States, a college in Virginia named for two monarchs.'}],
|
|
"dev": [{'text': "This capital of England", "top": 'Maine', "second": 'Boston'},
|
|
{'text': "The author of Pride and Prejudice", "top": 'Jane_Austen',
|
|
"second": 'Jane_Austen'},
|
|
{'text': "The composer of the Magic Flute", "top": 'Wolfgang_Amadeus_Mozart',
|
|
"second": 'Wolfgang_Amadeus_Mozart'},
|
|
{'text': "The economic law that says 'good money drives out bad'",
|
|
"top": "Gresham's_law", "second": "Gresham's_law"},
|
|
{'text': "located outside Boston, the oldest University in the United States",
|
|
"top": 'College_of_William_&_Mary', "second": 'Rhode_Island'}]
|
|
}
|
|
def normalize_answer(answer):
|
|
"""
|
|
Remove superflous components to create a normalized form of an answer that
|
|
can be more easily compared.
|
|
"""
|
|
from unidecode import unidecode
|
|
|
|
if answer is None:
|
|
return ''
|
|
reduced = unidecode(answer)
|
|
reduced = reduced.replace("_", " ")
|
|
if "(" in reduced:
|
|
reduced = reduced.split("(")[0]
|
|
reduced = "".join(x for x in reduced.lower() if x not in string.punctuation)
|
|
reduced = reduced.strip()
|
|
|
|
for bad_start in ["the ", "a ", "an "]:
|
|
if reduced.startswith(bad_start):
|
|
reduced = reduced[len(bad_start):]
|
|
return reduced.strip()
|
|
class QuizBowlModel:
|
|
def __init__(self):
|
|
self.load_models()
|
|
|
|
def load_models(self):
|
|
"""Load all models"""
|
|
|
|
|
|
self.load_flan_models('google/flan-t5-large', 'google/flan-t5-small')
|
|
|
|
def load_seq2seq_model(self, model_dir):
|
|
"""Load saved models"""
|
|
self.test_tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
|
self.test_model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
|
|
self.test_model.eval()
|
|
|
|
def load_flan_models(self, large_model_id, small_model_id):
|
|
"""Load hugging face models."""
|
|
self.tokenizer_flan_t5 = AutoTokenizer.from_pretrained(large_model_id)
|
|
self.model_flan_t5 = AutoModelForSeq2SeqLM.from_pretrained(large_model_id)
|
|
self.tokenizer_t5 = AutoTokenizer.from_pretrained(small_model_id)
|
|
self.model_t5 = AutoModelForSeq2SeqLM.from_pretrained(small_model_id)
|
|
|
|
def guess_and_buzz(self, question_texts):
|
|
"""Generate answers from all models for given questions"""
|
|
total_answers = self.generate_answers(question_texts)
|
|
|
|
|
|
|
|
|
|
|
|
return self.ensemble_tfidf_voting(total_answers)
|
|
|
|
def generate_answers(self, question_texts):
|
|
"""Generate answers from each model."""
|
|
|
|
return [(self.decode_answer(self.model_flan_t5, self.tokenizer_flan_t5, question),
|
|
self.decode_answer(self.model_t5, self.tokenizer_t5, question))
|
|
for question in question_texts]
|
|
|
|
def decode_answer(self, model, tokenizer, input_text):
|
|
input_ids = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
|
|
with torch.no_grad():
|
|
outputs = model.generate(**input_ids, max_new_tokens=5, output_scores=True, return_dict_in_generate=True)
|
|
|
|
decoded_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
|
|
|
if outputs.scores:
|
|
log_probs = [torch.nn.functional.log_softmax(score, dim=-1) for score in outputs.scores]
|
|
scores = []
|
|
for i in range(len(outputs.sequences[0]) - 1):
|
|
selected_log_prob = log_probs[i][0, outputs.sequences[0][i + 1]].item()
|
|
scores.append(selected_log_prob)
|
|
confidence_score = np.exp(np.mean(scores))
|
|
else:
|
|
confidence_score = None
|
|
|
|
return decoded_text, confidence_score
|
|
|
|
def ensemble_tfidf_voting(self, all_answers):
|
|
"""Find answer with highest confidence"""
|
|
for answers in all_answers:
|
|
highest_confidence_answer = max(answers, key=lambda x: x[1])
|
|
yield {'guess': highest_confidence_answer[0], 'confidence': highest_confidence_answer[1]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|