sunwaee commited on
Commit
2f5e722
1 Parent(s): c1af279
Files changed (1) hide show
  1. app.py +182 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gdown as gdown
4
+ import nltk
5
+ import streamlit as st
6
+ import torch
7
+ from transformers import AutoTokenizer
8
+
9
+ from mt5 import MT5
10
+
11
+
12
+ def download_models(ids):
13
+ """
14
+ Download all models.
15
+
16
+ :param ids: name and links of models
17
+ :return:
18
+ """
19
+
20
+ # Download sentence tokenizer
21
+ nltk.download('punkt')
22
+
23
+ # Download model from drive if not stored locally
24
+ for key in ids:
25
+ if not os.path.isfile(f"model/{key}.ckpt"):
26
+ url = f"https://drive.google.com/uc?id={ids[key]}"
27
+ gdown.download(url=url, output=f"model/{key}.ckpt")
28
+
29
+
30
+ @st.cache(allow_output_mutation=True)
31
+ def load_model(model_path):
32
+ """
33
+ Load model and cache it.
34
+
35
+ :param model_path: path to model
36
+ :return:
37
+ """
38
+
39
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
40
+
41
+ # Loading model and tokenizer
42
+ model = MT5.load_from_checkpoint(model_path).eval().to(device)
43
+ model.tokenizer = AutoTokenizer.from_pretrained('tokenizer')
44
+
45
+ return model
46
+
47
+
48
+ # Page config
49
+ st.set_page_config(layout="centered")
50
+ st.title("Questions/Answers Gen. (English)")
51
+ st.write("Question Generation, Question Answering and Questions/Answers Generation using Google MT5. ")
52
+
53
+ # Variables
54
+ # ids = {'mt5-small': st.secrets['model_key']}
55
+ ids = {'mt5-small': ''}
56
+
57
+ # Download all models from drive
58
+ # download_models(ids)
59
+
60
+ # Task selection
61
+
62
+ left, right = st.columns([4, 2])
63
+ task = left.selectbox('', options=['Questions/Answers Generation', 'Question Answering', 'Question Generation'],
64
+ help='Choose the task you want to try out')
65
+
66
+ # Model selection
67
+ model_path = right.selectbox('', options=[k for k in ids], index=0, help='Model to use. ')
68
+ model = load_model(model_path=f"model/{model_path}.ckpt")
69
+ right.write(model.device)
70
+
71
+ if task == 'Questions/Answers Generation':
72
+ # Input area
73
+ inputs = st.text_area('Context:', value="A few years after the First Crusade, in 1107, the Normans under "
74
+ "the command of Bohemond, Robert\'s son, landed in Valona and "
75
+ "besieged Dyrrachium using the most sophisticated military "
76
+ "equipment of the time, but to no avail. Meanwhile, they occupied "
77
+ "Petrela, the citadel of Mili at the banks of the river Deabolis, "
78
+ "Gllavenica (Ballsh), Kanina and Jericho. This time, "
79
+ "the Albanians sided with the Normans, dissatisfied by the heavy "
80
+ "taxes the Byzantines had imposed upon them. With their help, "
81
+ "the Normans secured the Arbanon passes and opened their way to "
82
+ "Dibra. The lack of supplies, disease and Byzantine resistance "
83
+ "forced Bohemond to retreat from his campaign and sign a peace "
84
+ "treaty with the Byzantines in the city of Deabolis. ", max_chars=2048,
85
+ height=250)
86
+ split = st.checkbox('Split into sentences')
87
+
88
+ if split:
89
+ # Split into sentences
90
+ sent_tokenized = nltk.sent_tokenize(inputs)
91
+ res = {}
92
+
93
+ # Iterate over sentences
94
+ for sentence in sent_tokenized:
95
+ predictions = model.multitask([sentence], max_length=512)
96
+ questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions[
97
+ 'answers_bis']
98
+
99
+ # Build answer dict
100
+ content = {}
101
+ for question, answer, answer_bis in zip(questions[0], answers[0], answers_bis[0]):
102
+ content[question] = {'answer (extracted)': answer, 'answer (generated)': answer_bis}
103
+ res[sentence] = content
104
+
105
+ # Answer area
106
+ st.write(res)
107
+
108
+ else:
109
+ # Prediction
110
+ predictions = model.multitask([inputs], max_length=512)
111
+ questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions['answers_bis']
112
+
113
+ # Answer area
114
+ zip = zip(questions[0], answers[0], answers_bis[0])
115
+ content = {}
116
+ for question, answer, answer_bis in zip:
117
+ content[question] = {'answer': answer, 'answer_bis': answer_bis}
118
+ st.write(content)
119
+
120
+ elif task == 'Question Answering':
121
+
122
+ # Input area
123
+ inputs = st.text_area('Context:', value="A few years after the First Crusade, in 1107, the Normans under "
124
+ "the command of Bohemond, Robert\'s son, landed in Valona and "
125
+ "besieged Dyrrachium using the most sophisticated military "
126
+ "equipment of the time, but to no avail. Meanwhile, they occupied "
127
+ "Petrela, the citadel of Mili at the banks of the river Deabolis, "
128
+ "Gllavenica (Ballsh), Kanina and Jericho. This time, "
129
+ "the Albanians sided with the Normans, dissatisfied by the heavy "
130
+ "taxes the Byzantines had imposed upon them. With their help, "
131
+ "the Normans secured the Arbanon passes and opened their way to "
132
+ "Dibra. The lack of supplies, disease and Byzantine resistance "
133
+ "forced Bohemond to retreat from his campaign and sign a peace "
134
+ "treaty with the Byzantines in the city of Deabolis. ", max_chars=2048,
135
+ height=250)
136
+ question = st.text_input('Question:', value="What forced Bohemond to retreat from his campaign? ")
137
+
138
+ # Prediction
139
+ predictions = model.qa([{'question': question, 'context': inputs}], max_length=512)
140
+ answer = {question: predictions[0]}
141
+
142
+ # Answer area
143
+ st.write(answer)
144
+
145
+ elif task == 'Question Generation':
146
+
147
+ # Input area
148
+ inputs = st.text_area('Context (highlight answers with <hl> tokens): ',
149
+ value="A few years after the First Crusade, in <hl> 1107 <hl>, the <hl> Normans <hl> under "
150
+ "the command of <hl> Bohemond <hl>, Robert\'s son, landed in Valona and "
151
+ "besieged Dyrrachium using the most sophisticated military "
152
+ "equipment of the time, but to no avail. Meanwhile, they occupied "
153
+ "Petrela, <hl> the citadel of Mili <hl> at the banks of the river Deabolis, "
154
+ "Gllavenica (Ballsh), Kanina and Jericho. This time, "
155
+ "the Albanians sided with the Normans, dissatisfied by the heavy "
156
+ "taxes the Byzantines had imposed upon them. With their help, "
157
+ "the Normans secured the Arbanon passes and opened their way to "
158
+ "Dibra. The <hl> lack of supplies, disease and Byzantine resistance <hl> "
159
+ "forced Bohemond to retreat from his campaign and sign a peace "
160
+ "treaty with the Byzantines in the city of Deabolis. ", max_chars=2048,
161
+ height=250)
162
+
163
+ # Split by highlights
164
+ hl_index = [i for i in range(len(inputs)) if inputs.startswith('<hl>', i)]
165
+ contexts = []
166
+ answers = []
167
+
168
+ # Build a context for each highlight pair
169
+ for i in range(0, len(hl_index), 2):
170
+ contexts.append(inputs[:hl_index[i]].replace('<hl>', '') +
171
+ inputs[hl_index[i]: hl_index[i + 1] + 4] +
172
+ inputs[hl_index[i + 1] + 4:].replace('<hl>', ''))
173
+ answers.append(inputs[hl_index[i]: hl_index[i + 1] + 4].replace('<hl>', '').strip())
174
+
175
+ # Prediction
176
+ predictions = model.qg(contexts, max_length=512)
177
+
178
+ # Answer area
179
+ content = {}
180
+ for pred, ans in zip(predictions, answers):
181
+ content[pred] = ans
182
+ st.write(content)