Rainsilves commited on
Commit
f42affe
β€’
1 Parent(s): 05f19e8

added rhoaplisms and E-prime setting

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import re
2
  from unittest import result
3
-
4
  import streamlit as st
5
  import torch
6
  from torch.nn import functional as F
@@ -25,15 +25,23 @@ form.header("Main Settings")
25
 
26
  model_name = form.text_area("Enter the name of the pre-trained model from transformers that we are using for Text Generation", value = "gpt2")
27
  form.caption("This will download a new model, so it may take awhile or even break if the model is too large")
28
- mode = form.selectbox("What kind of constrained generation are we doing?", ["lipogram", "reverse_lipogram", "length_constrained", "greater_than_length"])
29
  form.caption("Lipograms mean that a letter (or substring) is not allowed in the generated string, reverse lipograms force a letter to be in the generated string")
30
 
31
  if mode == "lipogram":
32
  naughty_strings_list = st.text_area("Enter the list of strings that you don't want in each word seperated by a space", value = "E e")
33
  naughty_strings = naughty_strings_list.split(" ")
 
 
 
 
 
34
  elif mode == "reverse_lipogram":
35
  nice_strings_list = st.text_area("Enter the list of strings that you DO want in each word seperated by a space", value = "t T")
36
  nice_strings = nice_strings_list.split(" ")
 
 
 
37
  else:
38
  length_constraint = form.number_input("Enter the length should each word be restricted to (or greater/less than)", value = 5) + 1
39
 
@@ -59,6 +67,9 @@ with st.spinner("Please wait while the model loads:"):
59
  def isPalindrome(s):
60
  return s == s[::-1]
61
 
 
 
 
62
 
63
  def get_next_word_without_e(input_sequence):
64
  input_ids = tokenizer.encode(sequence, return_tensors="pt")
@@ -77,7 +88,7 @@ def get_next_word_without_e(input_sequence):
77
  resulting_string = tokenizer.decode(candidate) #skip_special_tokens=True, clean_up_tokenization_spaces=True)
78
  ###Constrained text generation starts HERE
79
  ##Lipogram - No naughty strings used
80
- if mode == "lipogram":
81
  if all(nauty_string not in resulting_string for nauty_string in naughty_strings): ## This returns at the first naughty strings
82
  return resulting_string
83
  ##Reverse-Lipogram - Must use things in nice_strings
@@ -93,6 +104,10 @@ def get_next_word_without_e(input_sequence):
93
  ##Only sort of works
94
  if len(resulting_string) >= length_constraint:
95
  return resulting_string
 
 
 
 
96
  return " "
97
 
98
 
@@ -101,6 +116,7 @@ i = length
101
  while i > 0:
102
  new_word = get_next_word_without_e(input_sequence= sequence)
103
  sequence = sequence + new_word
 
104
  i = i-1
105
 
106
  st.write("GENERATED SEQUENCE: ")
 
1
  import re
2
  from unittest import result
3
+ import string
4
  import streamlit as st
5
  import torch
6
  from torch.nn import functional as F
 
25
 
26
  model_name = form.text_area("Enter the name of the pre-trained model from transformers that we are using for Text Generation", value = "gpt2")
27
  form.caption("This will download a new model, so it may take awhile or even break if the model is too large")
28
+ mode = form.selectbox("What kind of constrained generation are we doing?", ["lipogram", "reverse_lipogram", "e-prime", "rhopalism", "length_constrained", "greater_than_length"])
29
  form.caption("Lipograms mean that a letter (or substring) is not allowed in the generated string, reverse lipograms force a letter to be in the generated string")
30
 
31
  if mode == "lipogram":
32
  naughty_strings_list = st.text_area("Enter the list of strings that you don't want in each word seperated by a space", value = "E e")
33
  naughty_strings = naughty_strings_list.split(" ")
34
+ elif mode == "e-prime":
35
+ e_prime_string = """be being been am is isn't are aren't was wasn't were weren't i'm you're we're they're he's she's it's there's here's where's how's what's who's that's aint isnt arent wasnt werent im youre were theyre hes shes its theres heres wheres hows whats whos thats aint Be Being Been Am Is Isn't Are Aren't Was Wasn't Were Weren't I'm You're We're They're He's She's It's There's Here's Where's How's What's Who's That's Aint Isnt Arent Wasnt Werent Im Youre Were Theyre Hes Shes Its Theres Heres Wheres Hows Whats Whos Thats Aint BE BEING BEEN AM IS ISN'T ARE AREN'T WAS WASN'T WERE WEREN'T I'M YOU'RE WE'RE THEY'RE HE'S SHE'S IT'S THERE'S HERE'S WHERE'S HOW'S WHAT'S WHO'S THAT'S AINT ISNT ARENT WASNT WERENT IM YOURE WERE THEYRE HES SHES ITS THERES HERES WHERES HOWS WHATS WHOS THATS AINT"""
36
+ st.caption("The default word list is the list needed to enforce the language model to generate english without usage of the verb to be")
37
+ naughty_strings_list = st.text_area("Enter the list of strings that you don't want to be generated (exact match)", value = e_prime_string)
38
+ naughty_strings = naughty_strings_list.split(" ")
39
  elif mode == "reverse_lipogram":
40
  nice_strings_list = st.text_area("Enter the list of strings that you DO want in each word seperated by a space", value = "t T")
41
  nice_strings = nice_strings_list.split(" ")
42
+ elif mode == "rhopalism":
43
+ length_constraint = form.number_input("Enter the length that the Rhopalism shoud start with", value = 1)
44
+ st.caption("Rhopalisms are usually reliable but sometimes you need to try generating two or three times for a perfect one")
45
  else:
46
  length_constraint = form.number_input("Enter the length should each word be restricted to (or greater/less than)", value = 5) + 1
47
 
 
67
  def isPalindrome(s):
68
  return s == s[::-1]
69
 
70
+ rhopalism_len = length_constraint
71
+ alphabet_string = string.ascii_lowercase
72
+
73
 
74
  def get_next_word_without_e(input_sequence):
75
  input_ids = tokenizer.encode(sequence, return_tensors="pt")
 
88
  resulting_string = tokenizer.decode(candidate) #skip_special_tokens=True, clean_up_tokenization_spaces=True)
89
  ###Constrained text generation starts HERE
90
  ##Lipogram - No naughty strings used
91
+ if mode == "lipogram" or mode == "e-prime":
92
  if all(nauty_string not in resulting_string for nauty_string in naughty_strings): ## This returns at the first naughty strings
93
  return resulting_string
94
  ##Reverse-Lipogram - Must use things in nice_strings
 
104
  ##Only sort of works
105
  if len(resulting_string) >= length_constraint:
106
  return resulting_string
107
+ elif mode == "rhopalism":
108
+ ##Mostly works
109
+ if len(resulting_string) == rhopalism_len:
110
+ return resulting_string
111
  return " "
112
 
113
 
 
116
  while i > 0:
117
  new_word = get_next_word_without_e(input_sequence= sequence)
118
  sequence = sequence + new_word
119
+ rhopalism_len += 1
120
  i = i-1
121
 
122
  st.write("GENERATED SEQUENCE: ")