File size: 3,618 Bytes
6bc94ac
 
 
 
 
 
abca9bf
2a846a9
6bc94ac
 
 
 
abca9bf
6bc94ac
2a846a9
6bc94ac
abca9bf
fe183af
abca9bf
5beab45
 
 
 
2a846a9
5beab45
 
abca9bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5beab45
 
6bc94ac
5beab45
 
 
 
 
 
 
 
 
6bc94ac
5beab45
 
6bc94ac
abca9bf
5beab45
 
 
 
 
6bc94ac
5beab45
 
 
 
6bc94ac
5beab45
 
 
6bc94ac
5beab45
 
 
6bc94ac
5beab45
 
 
b190683
5beab45
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import itertools
import re
import spacy
import json
import evaluate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
from unlimiformer import Unlimiformer, UnlimiformerArguments
import torch 

from utils import *
from celebbot import CelebBot

QA_MODEL_ID = "google/flan-t5-xl"
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]

USE_UNLIMIFORMER = True
TOP_K = 16

celeb_data = get_celeb_data("data.json")
references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
references = list(itertools.chain.from_iterable(references))
predictions = []

device = 'cpu'
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID)
if USE_UNLIMIFORMER:
    defaults = UnlimiformerArguments()
    unlimiformer_kwargs = {
        'layer_begin': defaults.layer_begin, 
        'layer_end': defaults.layer_end,
        'unlimiformer_head_num': defaults.unlimiformer_head_num, 
        'exclude_attention': defaults.unlimiformer_exclude, 
        'chunk_overlap': defaults.unlimiformer_chunk_overlap,
        'model_encoder_max_len': defaults.unlimiformer_chunk_size,
        'verbose': defaults.unlimiformer_verbose, 'tokenizer': QA_tokenizer,
        'unlimiformer_training': defaults.unlimiformer_training,
        'use_datastore': defaults.use_datastore,
        'flat_index': defaults.flat_index,
        'test_datastore': defaults.test_datastore,
        'reconstruct_embeddings': defaults.reconstruct_embeddings,
        'gpu_datastore': defaults.gpu_datastore,
        'gpu_index': defaults.gpu_index
    }
    QA_model =Unlimiformer.convert_model(QA_model, **unlimiformer_kwargs).to(device)
else:
    QA_model = QA_model.to(device)

sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)

for celeb_name in celeb_names:
    gender = celeb_data[celeb_name]["gender"]
    if celeb_name == "Madonna":
        name = "Madonna-American-singer-and-actress"
    elif celeb_name == "Anne Hathaway":
        name = "Anne-Hathaway-American-actress"
    else:
        name="-".join(celeb_name.split(" "))
    knowledge = get_article(f"https://www.britannica.com/biography/{name}")

    spacy_model = spacy.load("en_core_web_lg")
    knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]

    ai = CelebBot(celeb_name, gender, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents, top_k=TOP_K)
    for q in celeb_data[celeb_name]["questions"]:
        ai.text = q
        response = ai.question_answer()
        print("response:", response)
        predictions.append(response)

file = open('predictions.txt','w')
for prediction in predictions:
    file.write(prediction+"\n")
file.close()

bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references, max_order=4)
print(f"BLEU: {round(results['bleu'], 2)}")

meteor = evaluate.load("meteor")
results = meteor.compute(predictions=predictions, references=references)
print(f"METEOR: {round(results['meteor'], 2)}")

rouge = evaluate.load("rouge")
results = rouge.compute(predictions=predictions, references=references)
print(f"ROUGE: {round(results['rougeL'], 2)}")

bertscore = evaluate.load("bertscore")
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")