File size: 4,802 Bytes
6bc94ac
 
 
 
 
 
2a846a9
6bc94ac
 
 
 
7fba753
6bc94ac
2a846a9
6bc94ac
 
2a846a9
 
 
6bc94ac
2a846a9
6bc94ac
 
 
 
2a846a9
6bc94ac
2a846a9
6bc94ac
2a846a9
6bc94ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a846a9
 
6bc94ac
2a846a9
 
 
 
 
 
 
6bc94ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b190683
 
b55470f
2a846a9
6bc94ac
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import itertools
import re
import spacy
import json
import evaluate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
import torch 

from utils import *
from celebbot import CelebBot

QA_MODEL_ID = "google/flan-t5-xl"
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]

def evaluate_system():

    device = 'cpu'
    with open("data.json", encoding='utf-8') as json_file:
        celeb_data = json.load(json_file)
    references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
    references = list(itertools.chain.from_iterable(references))
    predictions = []

    QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
    QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
    sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
    sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
    
    for name in celeb_names:
        gender = celeb_data[name]["gender"]
        knowledge = celeb_data[name]["knowledge"]
        
        lname = name.split(" ")[-1]
        lname_regex = re.compile(rf'\b({lname})\b')
        name_regex = re.compile(rf'\b({name})\b')
        lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
        lnames_regex = re.compile(rf'\b({lnames})\b')
        names = name+"’s" if not name.endswith("s") else name+"’"
        names_regex = re.compile(rf'\b({names})\b')
        if gender == "M":
            knowledge = re.sub(he_regex, "I", knowledge)
            knowledge = re.sub(his_regex, "my", knowledge)
        elif gender == "F":
            knowledge = re.sub(she_regex, "I", knowledge)
            knowledge = re.sub(her_regex, "my", knowledge)
        knowledge = re.sub(names_regex, "my", knowledge)
        knowledge = re.sub(lnames_regex, "my", knowledge)
        knowledge = re.sub(name_regex, "I", knowledge)
        knowledge = re.sub(lname_regex, "I", knowledge)
        
        spacy_model = spacy.load("en_core_web_sm")
        knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]

        ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
        if re.search(re.compile(rf'\b(you|your|{ai.name})\b', flags=re.IGNORECASE), ai.text) != None:              
            instruction1 = f"You are a celebrity named {ai.name}.  Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."

            knowledge = ai.retrieve_knowledge_assertions()
        else:
            instruction1 = f"Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
        queries = [f"Context: {instruction1} {knowledge}\n\nQuestion: {q}\n\nAnswer:" for q in celeb_data[name]["questions"]]
        input_ids = ai.QA_tokenizer(f"{queries}", return_tensors="pt").input_ids.to(device)
        outputs = ai.QA_model.generate(input_ids, max_length=1024)
        predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)

    file = open('predictions.txt','w')
    for prediction in predictions:
        file.write(prediction+"\n")
    file.close()

    bleu = evaluate.load("bleu")
    results = bleu.compute(predictions=predictions, references=references, max_order=4)
    print(f"BLEU: {round(results['bleu'], 2)}")

    meteor = evaluate.load("meteor")
    results = meteor.compute(predictions=predictions, references=references)
    print(f"METEOR: {round(results['meteor'], 2)}")

    rouge = evaluate.load("rouge")
    results = rouge.compute(predictions=predictions, references=references)
    print(f"ROUGE: {round(results['rougeL'], 2)}")

    bertscore = evaluate.load("bertscore")
    results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
    print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
    
if __name__ == "__main__":
    evaluate_system()