Spaces:
Running
Running
smhavens
commited on
Commit
•
3835be6
1
Parent(s):
5363e91
Updated printing and word generation
Browse files
app.py
CHANGED
@@ -31,6 +31,10 @@ ROMAN_CONSTANTS = (
|
|
31 |
( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
|
32 |
( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
|
33 |
( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
|
|
|
|
|
|
|
|
|
34 |
)
|
35 |
|
36 |
# answer = "Pizza"
|
@@ -175,9 +179,9 @@ def embeddings(model, sentences):
|
|
175 |
# Load model from HuggingFace Hub
|
176 |
tokenizer = AutoTokenizer.from_pretrained('bert-analogies')
|
177 |
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
178 |
-
token_ids = tokenizer.encode(sentences, return_tensors='pt')
|
179 |
-
blank_id = tokenizer.mask_token_id
|
180 |
-
blank_id_idx = torch.where(encoded_input["input_ids"] == blank_id)[1]
|
181 |
|
182 |
encoded_input["input_ids"] = encoded_input["input_ids"].to(device)
|
183 |
encoded_input["attention_mask"] = encoded_input["attention_mask"].to(device)
|
@@ -210,8 +214,12 @@ def embeddings(model, sentences):
|
|
210 |
temp_word = guess['token_str']
|
211 |
if temp_word[0].isalpha() and temp_word not in stops and temp_word not in ROMAN_CONSTANTS:
|
212 |
potential_words.append(guess['token_str'])
|
|
|
|
|
|
|
|
|
213 |
|
214 |
-
return
|
215 |
|
216 |
|
217 |
def random_word():
|
@@ -242,12 +250,14 @@ def generate_prompt(model):
|
|
242 |
global word3
|
243 |
global answer
|
244 |
word1 = random_word()
|
245 |
-
word2 = random_word()
|
|
|
246 |
word3 = random_word()
|
247 |
-
sentence = f"{word1} is to {word2} as {word3} is to [MASK]"
|
248 |
print(sentence)
|
249 |
-
answer = embeddings(model, sentence)
|
250 |
print("ANSWER IS", answer)
|
|
|
251 |
# cosine_scores(model, sentence)
|
252 |
|
253 |
|
@@ -258,27 +268,37 @@ def check_answer(guess:str):
|
|
258 |
global guesses
|
259 |
global answer
|
260 |
global return_guesses
|
|
|
|
|
|
|
|
|
261 |
model = get_model()
|
262 |
output = ""
|
263 |
protected_guess = guess
|
264 |
-
sentence = f"{word1} is to {word2} as [MASK] is to {guess}"
|
265 |
-
|
|
|
266 |
guesses.append(guess)
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
return_guesses.append(return_guess)
|
271 |
for guess in return_guesses:
|
272 |
-
output += (guess + "
|
273 |
-
|
274 |
-
|
|
|
|
|
275 |
|
276 |
-
print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
|
277 |
if protected_guess.lower() == answer.lower():
|
278 |
-
|
|
|
|
|
|
|
279 |
else:
|
280 |
-
|
281 |
-
|
|
|
|
|
282 |
|
283 |
def main():
|
284 |
global word1
|
@@ -298,16 +318,16 @@ def main():
|
|
298 |
print(prompt)
|
299 |
print("TESTING EMBEDDINGS")
|
300 |
with gr.Blocks() as iface:
|
301 |
-
gr.Markdown(prompt)
|
302 |
with gr.Tab("Guess"):
|
303 |
text_input = gr.Textbox()
|
304 |
text_output = gr.Textbox()
|
305 |
text_button = gr.Button("Submit")
|
306 |
with gr.Accordion("Open for previous guesses"):
|
307 |
-
text_guesses = gr.
|
308 |
# with gr.Tab("Testing"):
|
309 |
# gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
|
310 |
-
text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
|
311 |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
312 |
iface.launch()
|
313 |
|
|
|
31 |
( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
|
32 |
( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
|
33 |
( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
|
34 |
+
( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
|
35 |
+
( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
|
36 |
+
( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
|
37 |
+
( "", "m", "mm", "mmm", "", "", "-", "", "", "" ),
|
38 |
)
|
39 |
|
40 |
# answer = "Pizza"
|
|
|
179 |
# Load model from HuggingFace Hub
|
180 |
tokenizer = AutoTokenizer.from_pretrained('bert-analogies')
|
181 |
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
182 |
+
# token_ids = tokenizer.encode(sentences, return_tensors='pt')
|
183 |
+
# blank_id = tokenizer.mask_token_id
|
184 |
+
# blank_id_idx = torch.where(encoded_input["input_ids"] == blank_id)[1]
|
185 |
|
186 |
encoded_input["input_ids"] = encoded_input["input_ids"].to(device)
|
187 |
encoded_input["attention_mask"] = encoded_input["attention_mask"].to(device)
|
|
|
214 |
temp_word = guess['token_str']
|
215 |
if temp_word[0].isalpha() and temp_word not in stops and temp_word not in ROMAN_CONSTANTS:
|
216 |
potential_words.append(guess['token_str'])
|
217 |
+
|
218 |
+
rand_index = random.randint(0, len(potential_words) - 1)
|
219 |
+
print("THE LENGTH OF POTENTIAL WORDS FOR", sentences, "IS", len(potential_words), "AND THE RANDOM INDEX CHOSEN IS", rand_index)
|
220 |
+
chosen_word = potential_words[rand_index]
|
221 |
|
222 |
+
return chosen_word
|
223 |
|
224 |
|
225 |
def random_word():
|
|
|
250 |
global word3
|
251 |
global answer
|
252 |
word1 = random_word()
|
253 |
+
# word2 = random_word()
|
254 |
+
word2 = embeddings(model, f"{word1} is to [MASK].")
|
255 |
word3 = random_word()
|
256 |
+
sentence = f"{word1} is to {word2} as {word3} is to [MASK]."
|
257 |
print(sentence)
|
258 |
+
answer = embeddings(model, sentence)
|
259 |
print("ANSWER IS", answer)
|
260 |
+
return f"# {word1} is to {word2} as {word3} is to ___."
|
261 |
# cosine_scores(model, sentence)
|
262 |
|
263 |
|
|
|
268 |
global guesses
|
269 |
global answer
|
270 |
global return_guesses
|
271 |
+
global word1
|
272 |
+
global word2
|
273 |
+
global word3
|
274 |
+
|
275 |
model = get_model()
|
276 |
output = ""
|
277 |
protected_guess = guess
|
278 |
+
sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
|
279 |
+
|
280 |
+
other_word = embeddings(model, sentence)
|
281 |
guesses.append(guess)
|
282 |
+
|
283 |
+
|
284 |
+
|
|
|
285 |
for guess in return_guesses:
|
286 |
+
output += ("- " + guess + "<br>")
|
287 |
+
|
288 |
+
# output = output[:-1]
|
289 |
+
prompt = f"{word1} is to {word2} as {word3} is to ___."
|
290 |
+
# print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
|
291 |
|
|
|
292 |
if protected_guess.lower() == answer.lower():
|
293 |
+
return_guesses[len(return_guesses)-1] = f"{word1} is to {word2} as {word3} is to {protected_guess}."
|
294 |
+
output += f"- <span style='color:green'>{return_guesses[-1]}</span>"
|
295 |
+
new_prompt = generate_prompt(model)
|
296 |
+
return new_prompt, "Correct!", output
|
297 |
else:
|
298 |
+
return_guess = f"{guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
|
299 |
+
return_guesses.append(return_guess)
|
300 |
+
output += return_guess
|
301 |
+
return prompt, "Try again!", output
|
302 |
|
303 |
def main():
|
304 |
global word1
|
|
|
318 |
print(prompt)
|
319 |
print("TESTING EMBEDDINGS")
|
320 |
with gr.Blocks() as iface:
|
321 |
+
mark_question = gr.Markdown(prompt)
|
322 |
with gr.Tab("Guess"):
|
323 |
text_input = gr.Textbox()
|
324 |
text_output = gr.Textbox()
|
325 |
text_button = gr.Button("Submit")
|
326 |
with gr.Accordion("Open for previous guesses"):
|
327 |
+
text_guesses = gr.Markdown()
|
328 |
# with gr.Tab("Testing"):
|
329 |
# gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
|
330 |
+
text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
|
331 |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
332 |
iface.launch()
|
333 |
|