smhavens commited on
Commit
3835be6
1 Parent(s): 5363e91

Updated printing and word generation

Browse files
Files changed (1) hide show
  1. app.py +43 -23
app.py CHANGED
@@ -31,6 +31,10 @@ ROMAN_CONSTANTS = (
31
  ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
32
  ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
33
  ( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
 
 
 
 
34
  )
35
 
36
  # answer = "Pizza"
@@ -175,9 +179,9 @@ def embeddings(model, sentences):
175
  # Load model from HuggingFace Hub
176
  tokenizer = AutoTokenizer.from_pretrained('bert-analogies')
177
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
178
- token_ids = tokenizer.encode(sentences, return_tensors='pt')
179
- blank_id = tokenizer.mask_token_id
180
- blank_id_idx = torch.where(encoded_input["input_ids"] == blank_id)[1]
181
 
182
  encoded_input["input_ids"] = encoded_input["input_ids"].to(device)
183
  encoded_input["attention_mask"] = encoded_input["attention_mask"].to(device)
@@ -210,8 +214,12 @@ def embeddings(model, sentences):
210
  temp_word = guess['token_str']
211
  if temp_word[0].isalpha() and temp_word not in stops and temp_word not in ROMAN_CONSTANTS:
212
  potential_words.append(guess['token_str'])
 
 
 
 
213
 
214
- return potential_words
215
 
216
 
217
  def random_word():
@@ -242,12 +250,14 @@ def generate_prompt(model):
242
  global word3
243
  global answer
244
  word1 = random_word()
245
- word2 = random_word()
 
246
  word3 = random_word()
247
- sentence = f"{word1} is to {word2} as {word3} is to [MASK]"
248
  print(sentence)
249
- answer = embeddings(model, sentence)[0]
250
  print("ANSWER IS", answer)
 
251
  # cosine_scores(model, sentence)
252
 
253
 
@@ -258,27 +268,37 @@ def check_answer(guess:str):
258
  global guesses
259
  global answer
260
  global return_guesses
 
 
 
 
261
  model = get_model()
262
  output = ""
263
  protected_guess = guess
264
- sentence = f"{word1} is to {word2} as [MASK] is to {guess}"
265
- other_word = embeddings(model, sentence)[0]
 
266
  guesses.append(guess)
267
- print("GUESS IS", guess)
268
- return_guess = f"{guess}: {word1} is to {word2} as {other_word} is to {guess}"
269
- print("GUESS IS", guess)
270
- return_guesses.append(return_guess)
271
  for guess in return_guesses:
272
- output += (guess + "\n")
273
- output = output[:-1]
274
- print("GUESS IS", protected_guess)
 
 
275
 
276
- print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
277
  if protected_guess.lower() == answer.lower():
278
- return "Correct!", output
 
 
 
279
  else:
280
-
281
- return "Try again!", output
 
 
282
 
283
  def main():
284
  global word1
@@ -298,16 +318,16 @@ def main():
298
  print(prompt)
299
  print("TESTING EMBEDDINGS")
300
  with gr.Blocks() as iface:
301
- gr.Markdown(prompt)
302
  with gr.Tab("Guess"):
303
  text_input = gr.Textbox()
304
  text_output = gr.Textbox()
305
  text_button = gr.Button("Submit")
306
  with gr.Accordion("Open for previous guesses"):
307
- text_guesses = gr.Textbox()
308
  # with gr.Tab("Testing"):
309
  # gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
310
- text_button.click(check_answer, inputs=[text_input], outputs=[text_output, text_guesses])
311
  # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
312
  iface.launch()
313
 
 
31
  ( "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" ),
32
  ( "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" ),
33
  ( "", "M", "MM", "MMM", "", "", "-", "", "", "" ),
34
+ ( "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" ),
35
+ ( "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" ),
36
+ ( "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" ),
37
+ ( "", "m", "mm", "mmm", "", "", "-", "", "", "" ),
38
  )
39
 
40
  # answer = "Pizza"
 
179
  # Load model from HuggingFace Hub
180
  tokenizer = AutoTokenizer.from_pretrained('bert-analogies')
181
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
182
+ # token_ids = tokenizer.encode(sentences, return_tensors='pt')
183
+ # blank_id = tokenizer.mask_token_id
184
+ # blank_id_idx = torch.where(encoded_input["input_ids"] == blank_id)[1]
185
 
186
  encoded_input["input_ids"] = encoded_input["input_ids"].to(device)
187
  encoded_input["attention_mask"] = encoded_input["attention_mask"].to(device)
 
214
  temp_word = guess['token_str']
215
  if temp_word[0].isalpha() and temp_word not in stops and temp_word not in ROMAN_CONSTANTS:
216
  potential_words.append(guess['token_str'])
217
+
218
+ rand_index = random.randint(0, len(potential_words) - 1)
219
+ print("THE LENGTH OF POTENTIAL WORDS FOR", sentences, "IS", len(potential_words), "AND THE RANDOM INDEX CHOSEN IS", rand_index)
220
+ chosen_word = potential_words[rand_index]
221
 
222
+ return chosen_word
223
 
224
 
225
  def random_word():
 
250
  global word3
251
  global answer
252
  word1 = random_word()
253
+ # word2 = random_word()
254
+ word2 = embeddings(model, f"{word1} is to [MASK].")
255
  word3 = random_word()
256
+ sentence = f"{word1} is to {word2} as {word3} is to [MASK]."
257
  print(sentence)
258
+ answer = embeddings(model, sentence)
259
  print("ANSWER IS", answer)
260
+ return f"# {word1} is to {word2} as {word3} is to ___."
261
  # cosine_scores(model, sentence)
262
 
263
 
 
268
  global guesses
269
  global answer
270
  global return_guesses
271
+ global word1
272
+ global word2
273
+ global word3
274
+
275
  model = get_model()
276
  output = ""
277
  protected_guess = guess
278
+ sentence = f"{word1} is to {word2} as [MASK] is to {guess}."
279
+
280
+ other_word = embeddings(model, sentence)
281
  guesses.append(guess)
282
+
283
+
284
+
 
285
  for guess in return_guesses:
286
+ output += ("- " + guess + "<br>")
287
+
288
+ # output = output[:-1]
289
+ prompt = f"{word1} is to {word2} as {word3} is to ___."
290
+ # print("IS", protected_guess, "EQUAL TO", answer, ":", protected_guess.lower() == answer.lower())
291
 
 
292
  if protected_guess.lower() == answer.lower():
293
+ return_guesses[len(return_guesses)-1] = f"{word1} is to {word2} as {word3} is to {protected_guess}."
294
+ output += f"- <span style='color:green'>{return_guesses[-1]}</span>"
295
+ new_prompt = generate_prompt(model)
296
+ return new_prompt, "Correct!", output
297
  else:
298
+ return_guess = f"{guess}: {word1} is to {word2} as {other_word} is to {protected_guess}."
299
+ return_guesses.append(return_guess)
300
+ output += return_guess
301
+ return prompt, "Try again!", output
302
 
303
  def main():
304
  global word1
 
318
  print(prompt)
319
  print("TESTING EMBEDDINGS")
320
  with gr.Blocks() as iface:
321
+ mark_question = gr.Markdown(prompt)
322
  with gr.Tab("Guess"):
323
  text_input = gr.Textbox()
324
  text_output = gr.Textbox()
325
  text_button = gr.Button("Submit")
326
  with gr.Accordion("Open for previous guesses"):
327
+ text_guesses = gr.Markdown()
328
  # with gr.Tab("Testing"):
329
  # gr.Markdown(f"""The Embeddings are {sent_embeddings}.""")
330
+ text_button.click(check_answer, inputs=[text_input], outputs=[mark_question, text_output, text_guesses])
331
  # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
332
  iface.launch()
333