File size: 2,163 Bytes
d0dbf63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from typing import List, Tuple
import nltk
import sklearn
from .tfidf import TfidfWikiGuesser
import numpy as np
import pandas as pd
from .LogRegBuzzer import LogisticRegressionBuzzer


class QuizBowlModel:

    def __init__(self, use_hf_pkl = False):
        """
        Load your model(s) and whatever else you need in this function.

        Do NOT load your model or resources in the guess_and_buzz() function, 
        as it will increase latency severely. 
        """
        #best accuracy when using wiki_page_text.json
        self.guesser = TfidfWikiGuesser(wikidump=None, use_hf_pkl= use_hf_pkl) #can specify different wikidump if needed 
        print("guesser model loaded")

        self.buzzer = LogisticRegressionBuzzer()
        print("buzzer model loaded")


    def guess_and_buzz(self, question_text: List[str]) -> List[Tuple[str, bool]]:
        """
        This function accepts a list of question strings, and returns a list of tuples containing
        strings representing the guess and corresponding booleans representing 
        whether or not to buzz. 

        So, guess_and_buzz(["This is a question"]) should return [("answer", False)]

        If you are using a deep learning model, try to use batched prediction instead of 
        iterating using a for loop.
        """

        answers = []
        top_guesses = 3 #guesser will return this amount guesses for each question (in sorted confidence)

        for question in question_text:
            guesses = self.guesser.make_guess(question, num_guesses=top_guesses)
            # print(f"\n\n\n answered {len(answers)} questions so far \n\n")
            # print(f"left to answer {len(question_text)-len(answers)} questions \n\n ")
            # print(f"progress: {(len(answers)/len(question_text)) * 100} \n\n")

            #do the buzzing 
            buzz = self.buzzer.predict_buzz(question, guesses[0])
            

            #make a tuple and add to answers list 
            tup = (guesses[0], buzz[1])
            print(tup)
            answers.append(tup)
            #might neeed to format guees like replace _ with space 

        return answers