File size: 2,163 Bytes
d0dbf63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from typing import List, Tuple
import nltk
import sklearn
from .tfidf import TfidfWikiGuesser
import numpy as np
import pandas as pd
from .LogRegBuzzer import LogisticRegressionBuzzer
class QuizBowlModel:
def __init__(self, use_hf_pkl = False):
"""
Load your model(s) and whatever else you need in this function.
Do NOT load your model or resources in the guess_and_buzz() function,
as it will increase latency severely.
"""
#best accuracy when using wiki_page_text.json
self.guesser = TfidfWikiGuesser(wikidump=None, use_hf_pkl= use_hf_pkl) #can specify different wikidump if needed
print("guesser model loaded")
self.buzzer = LogisticRegressionBuzzer()
print("buzzer model loaded")
def guess_and_buzz(self, question_text: List[str]) -> List[Tuple[str, bool]]:
"""
This function accepts a list of question strings, and returns a list of tuples containing
strings representing the guess and corresponding booleans representing
whether or not to buzz.
So, guess_and_buzz(["This is a question"]) should return [("answer", False)]
If you are using a deep learning model, try to use batched prediction instead of
iterating using a for loop.
"""
answers = []
top_guesses = 3 #guesser will return this amount guesses for each question (in sorted confidence)
for question in question_text:
guesses = self.guesser.make_guess(question, num_guesses=top_guesses)
# print(f"\n\n\n answered {len(answers)} questions so far \n\n")
# print(f"left to answer {len(question_text)-len(answers)} questions \n\n ")
# print(f"progress: {(len(answers)/len(question_text)) * 100} \n\n")
#do the buzzing
buzz = self.buzzer.predict_buzz(question, guesses[0])
#make a tuple and add to answers list
tup = (guesses[0], buzz[1])
print(tup)
answers.append(tup)
#might neeed to format guees like replace _ with space
return answers
|