File size: 2,611 Bytes
6bc94ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
import json
import spacy

from celebbot import CelebBot
from utils import *

DEBUG = False
QA_MODEL_ID = "google/flan-t5-large"
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"

def main():
    with open("data.json") as json_file:
        celeb_data = json.load(json_file)
    message = "Please choose your favorite celebrity from\n"\
              "1. Cate Blanchett\n"\
              "2. David Beckham\n"\
              "3. Emma Watson\n"\
              "4. Lady Gaga\n"\
              "5. Madonna\n"\
              "6. Mark Zuckerberg\n"\
              "input name:\n"
    QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
    QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID)
    sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
    sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)

    name = input(message)
    gender = celeb_data[name]["gender"]
    knowledge = celeb_data[name]["knowledge"]
    
    lname = name.split(" ")[-1]
    lname_regex = re.compile(rf'\b({lname})\b')
    name_regex = re.compile(rf'\b({name})\b')
    lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
    lnames_regex = re.compile(rf'\b({lnames})\b')
    names = name+"’s" if not name.endswith("s") else name+"’"
    names_regex = re.compile(rf'\b({names})\b')
    if gender == "M":
        knowledge = re.sub(he_regex, "I", knowledge)
        knowledge = re.sub(his_regex, "my", knowledge)
    elif gender == "F":
        knowledge = re.sub(she_regex, "I", knowledge)
        knowledge = re.sub(her_regex, "my", knowledge)
    knowledge = re.sub(names_regex, "my", knowledge)
    knowledge = re.sub(lnames_regex, "my", knowledge)
    knowledge = re.sub(name_regex, "I", knowledge)
    knowledge = re.sub(lname_regex, "I", knowledge)
    
    spacy_model = spacy.load("en_core_web_sm")
    knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]

    ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)

    answers = []
    
    while True:
    # for q in celeb_data[ai.name_id][ai.name]["questions"]:
        if not DEBUG:
            ai.speech_to_text()
        else:
            ai.text = input("Your question: ")
            pass

        # ai.text = q
        if ai.text != "":
            print("me --> ", ai.text)

            answers.append(ai.question_answer())

            if not DEBUG:
                ai.text_to_speech()
        ai.text = ""

# Run the AI
if __name__ == "__main__":
    main()