uripper commited on
Commit
e042aad
1 Parent(s): 763512e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +269 -0
app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+
4
+ BAD_WORD = False
5
+ my_api = st.secrets["my_api"]
6
+ bad_words = st.secrets["bad_words"]
7
+
8
+ def per_generate(text, max_length=500, temperature=0.5, top_k=5, do_sample=False, use_cache=True):
9
+ API_URL = "https://api-inference.huggingface.co/models/uripper/ChatbotTrainingBot"
10
+ headers = {"Authorization": f"Bearer {my_api}"}
11
+
12
+ if do_sample:
13
+ use_cache = False
14
+
15
+ def query(payload):
16
+ response = requests.post(API_URL, headers=headers, json=payload)
17
+ return response.json()
18
+
19
+ output = query({
20
+ "inputs": f"{text}",
21
+ "parameters": {"max_new_tokens": max_length, "temperature": temperature, "top_k": top_k, "do_sample": do_sample},
22
+ "options": {"wait_for_model": True, "use_cache": use_cache},
23
+
24
+ })
25
+ return output
26
+
27
+ def gor_generate(text, max_length=500, temperature=0.5, top_k=5, do_sample=False, use_cache=True):
28
+ API_URL = "https://api-inference.huggingface.co/models/uripper/Gordon"
29
+ headers = {"Authorization": f"Bearer {my_api}"}
30
+
31
+ if do_sample:
32
+ use_cache = False
33
+
34
+ def query(payload):
35
+ response = requests.post(API_URL, headers=headers, json=payload)
36
+ return response.json()
37
+ output = query({
38
+ "inputs": f"{text}",
39
+ "parameters": {"max_new_tokens": max_length, "temperature": temperature, "top_k": top_k, "do_sample": do_sample},
40
+ "options": {"wait_for_model": True, "use_cache": use_cache},
41
+
42
+ })
43
+ return output
44
+
45
+ def rev_generate(text, max_length=500, temperature=0.5, top_k=5, do_sample=False, use_cache=True):
46
+ API_URL = "https://api-inference.huggingface.co/models/uripper/ReviewTrainingBot"
47
+ headers = {"Authorization": f"Bearer {my_api}"}
48
+
49
+ if do_sample:
50
+ use_cache = False
51
+
52
+ def query(payload):
53
+ response = requests.post(API_URL, headers=headers, json=payload)
54
+ return response.json()
55
+
56
+ output = query({
57
+ "inputs": f"{text}",
58
+ "parameters": {"max_new_tokens": max_length, "temperature": temperature, "top_p": .95, "do_sample": do_sample, "no_repeat_ngram_size":2},
59
+ "options": {"wait_for_model": True, "use_cache": use_cache},
60
+ })
61
+ return output
62
+
63
+
64
+ if "persona_chat_history" not in st.session_state:
65
+ st.session_state.persona_chat_history = []
66
+
67
+ if "gordon_chat_history" not in st.session_state:
68
+ st.session_state.gordon_chat_history = []
69
+
70
+
71
+ def main_page():
72
+
73
+ CHAT = False
74
+ REVIEW = False
75
+
76
+
77
+
78
+ st.title("Welcome to this multi function chatbot!")
79
+ st.write("This chatbot has a few interactive features, which can be accessed on the drop down menu on the left. \n\nThe first of these is the Review feature, the main feature of this application. You are able to enter the name of a movie and generate a review for it. This was created by finetuning a GPT-2 model on a dataset of movie reviews. The dataset was created via scraping around 500,000 letterboxd reviews.")
80
+ st.write("The next is the Gordon Chat feature. This is the main recommendation for chatting. This is a finetuned model of DialoGPT from Microsoft, which was trained on the movie lines dataset from Cornell. The corpus can be found at https://convokit.cornell.edu/documentation/movie.html. It is recommended to use greedy search for this model in order to create the most likely responses to text. Its responses are fairly normal, with some abilities to act a specific role in response to a remark. For limitations, read the limitations section below.")
81
+ st.write("The final feature is the Persona Chat feature. This is a finetuned model of distilgpt2 from Hugging Face, which was trained on the truecased Persona Chat dataset, found here https://huggingface.co/datasets/bavard/personachat_truecased. It is recommended to use greedy search for this model as well.")
82
+ st.title("Limitations and biases")
83
+ st.write("The main limitations of the review feature are that it is unable to find links between the movie title and the review itself, and struggles to determine positive and negative sentiment based on the score that is given. It however gives consistently plausible reviews, if not very plausible. It is unable to determine fact, and cannot give truthful reviews or reliably determine actors/directors for any given movie. Its main, and only, use case is for entertainment.")
84
+ st.write("The review bot also has social biases. Due to its underlying model, it has many of the same biases as GPT-2. These biases can be found here: https://huggingface.co/gpt2. In addition to these biases, it also struggles with some of the unique examples of this training dataset. For a concrete example of this, it is fairly common for a review of a movie with gay or lesbian characters to be described as being 'very gay' on letterboxd.com. This is almost always used as a positive thing, but the bot itself is incapable of determining that this is a positive sentiment, and will describe random films this way in a manner that seems more like a slur. This language can likely be extended to other ways that have not been discovered yet, and the model should be handled with care.")
85
+ st.write("Gordon chat's main limitations are that it has difficulty understanding context and reasoning for how it should choose responses. The same can be seen in greedy searches in the model it is trained after, DialoGPT. This is a limitation of chatbots, as a whole, and is not unique to this model. Due to its usage of movie lines as a dataset, prioritization of dialog that may be unnatural or overly dramatic may be expected in some cases, though it does not seem to be very common in practice.")
86
+ st.write("Gordon chat also has social biases. Due to its underlying model, it has many of the same biases as DialoGPT. These biases can be found here: https://www.microsoft.com/en-us/research/project/large-scale-pretraining-for-response-generation/. In addition to these biases, it may have additional biases due to its training dataset. As films often use slurs in a variety of ways for a variety of purposes, these slurs can make their way into the results. In order to solve this, there will be a list of 'bad words' that will prevent output. ")
87
+ st.write("Persona chat's main limitations are much of the same as Gordon chat. The responses have been subjectively rated as weaker than those of Gordon chat, but this may be due to personal preference, and your experience may vary.")
88
+ st.write("Persona chat has many of the same social biases as distilgpt2, which can be found here: https://huggingface.co/distilgpt2. In addition to these biases, it may have additional biases due to its training dataset, although they have not been discovered yet during my testing. As a special precaution, its responses are also passed through a bad word filter, which will prevent output if it contains any of the words in the list.")
89
+
90
+ def review():
91
+ BAD_WORD = False
92
+ st.title("Review")
93
+
94
+ temperature = st.slider("Temperature", 0.1, 1.0, 0.8, 0.01)
95
+ top_k = st.slider("Top K", 1, 100, 15, 1)
96
+ max_length = st.slider("Max Length", 1, 250, 100, 1)
97
+ do_sample = st.checkbox("Do Sample (If unchecked, will use greedy decoding, not recommended for review due to repetition)", True)
98
+
99
+ st.write("Please enter the name of the movie you would like to review. First generation may take up to a minute or more, as the model is loading. Latter generations should load faster.")
100
+ in_movie = st.text_input("Movie")
101
+ review_button = st.button("Generate Review")
102
+ random_review = st.button("Random Review")
103
+ st.write("Please only press Generate Review or Random Review once, it will take a short amount of time to load during the first generation.")
104
+ if review_button:
105
+ in_movie = "Movie: " + in_movie + " Score:"
106
+ output = rev_generate(in_movie, max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)
107
+
108
+ check_output = output[0]["generated_text"]
109
+ check_output = check_output.split(" ")
110
+ for i in check_output:
111
+ for j in bad_words:
112
+ if i.lower() is j:
113
+ BAD_WORD =True
114
+
115
+
116
+ print(output)
117
+ output = output[0]["generated_text"]
118
+
119
+ if BAD_WORD == True:
120
+
121
+ st.write("The bot generated a slur, please try again.")
122
+ BAD_WORD = False
123
+ else:
124
+ out_movie =output.split("Score:")[0]
125
+ out_movie = out_movie.replace("Movie: ", "")
126
+ score = output.split("Review:")[0]
127
+ score = score.split("Score:")[1]
128
+ review = output.split("Review:")[1]
129
+
130
+ review = review.replace("…", ".")
131
+ review = review.replace("...", ".")
132
+
133
+
134
+ st.write("Movie:")
135
+ st.write(out_movie)
136
+ st.write("Score:")
137
+ st.write(score)
138
+ st.write("Review:")
139
+ st.write(review)
140
+
141
+ if random_review:
142
+ output = rev_generate("Movie:", max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)
143
+ check_output = output[0]["generated_text"]
144
+ check_output = check_output.split(" ")
145
+ for i in check_output:
146
+ for j in bad_words:
147
+ if i.lower() is j:
148
+ BAD_WORD =True
149
+ print(output)
150
+ output = output[0]["generated_text"]
151
+ if BAD_WORD == True:
152
+ st.write(i)
153
+ st.write("The bot generated a slur, please try again.")
154
+ BAD_WORD = False
155
+ else:
156
+ out_movie =output.split("Score:")[0]
157
+ out_movie = out_movie.replace("Movie: ", "")
158
+ score = output.split("Review:")[0]
159
+ score = score.split("Score:")[1]
160
+ review = output.split("Review:")[1]
161
+
162
+ review = review.replace("…", ".")
163
+ review = review.replace("...", ".")
164
+
165
+
166
+ st.write("Movie:")
167
+ st.write(out_movie)
168
+ st.write("Score:")
169
+ st.write(score)
170
+ st.write("Review:")
171
+ st.write(review)
172
+
173
+
174
+
175
+ def persona():
176
+ BAD_WORD = False
177
+ st.title("Persona Chat")
178
+ st.write("Please enter your message below. First generation may take up to a minute or more, as the model is loading. Latter generations should load faster.")
179
+
180
+ temperature = st.slider("Temperature", 0.1, 1.0, 0.3, 0.1)
181
+ top_k = st.slider("Top K", 1, 100, 5, 1)
182
+ max_length = st.slider("Max Length", 1, 250, 25, 1)
183
+ do_sample = st.checkbox("Do Sample (If unchecked, will use greedy decoding, which is more coherent)")
184
+
185
+
186
+ user_chat = st.text_input("Chat with Persona!")
187
+ stan_chat_button = st.button("Send")
188
+ st.write("Please only press Send once, it will take a short amount of time to load during the first generation.")
189
+
190
+
191
+ if stan_chat_button:
192
+
193
+
194
+ user_chat = "User: " + user_chat
195
+ st.session_state.persona_chat_history.append(user_chat)
196
+ st.write(user_chat)
197
+ user_chat = user_chat + " Bot:"
198
+ output = per_generate(user_chat, max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)
199
+ check_output = output[0]["generated_text"]
200
+ check_output = check_output.split(" ")
201
+ for i in check_output:
202
+ for j in bad_words:
203
+ if i.lower() is j:
204
+ BAD_WORD =True
205
+ print(output)
206
+
207
+ if BAD_WORD == True:
208
+ st.write("The bot generated a slur, please try again.")
209
+ BAD_WORD = False
210
+ else:
211
+ output = output[0]["generated_text"]
212
+ output = output.split("Bot:")[1]
213
+ output = "Persona: " + output
214
+ st.write(output)
215
+
216
+
217
+
218
+
219
+ def gordon_chat():
220
+ BAD_WORD = False
221
+ st.title("Chat with Gordon")
222
+ st.write("Please enter your message below. First generation may take up to a minute or more, as the model is loading. Latter generations should load faster.")
223
+
224
+ temperature = st.slider("Temperature", 0.1, 1.0, 0.3, 0.1)
225
+ top_k = st.slider("Top K", 1, 100, 5, 1)
226
+ max_length = st.slider("Max Length", 1, 250, 25, 1)
227
+ do_sample = st.checkbox("Do Sample (If unchecked, will use greedy decoding, which is more coherent)")
228
+
229
+ user_chat = st.text_input("Chat with Gordon!")
230
+ gordon_chat_button = st.button("Send")
231
+ st.write("Please only press Send once, it will take a short amount of time to load during the first generation.")
232
+
233
+ if gordon_chat_button:
234
+
235
+
236
+ user_chat = "User: " + user_chat
237
+ st.session_state.gordon_chat_history.append(user_chat)
238
+ st.write(user_chat)
239
+ user_chat = user_chat + " Bot:"
240
+ output = gor_generate(user_chat, max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)
241
+ check_output = output[0]["generated_text"]
242
+ check_output = check_output.split(" ")
243
+ for i in check_output:
244
+ for j in bad_words:
245
+ if i.lower() is j:
246
+ BAD_WORD =True
247
+ print(output)
248
+
249
+
250
+ if BAD_WORD == True:
251
+ st.write("The bot generated a slur, please try again.")
252
+ BAD_WORD = False
253
+ else:
254
+ output = output[0]["generated_text"]
255
+ output = output.split("Bot:")[1]
256
+ output = "Gordon: " + output
257
+ st.write(output)
258
+
259
+
260
+ page_names_to_funcs = {
261
+ "Main Page": main_page,
262
+ "Review": review,
263
+ "Gordon Chat": gordon_chat,
264
+ "Persona Chat": persona,
265
+ }
266
+
267
+ selected_page = st.sidebar.selectbox("Select a page", page_names_to_funcs.keys())
268
+ page_names_to_funcs[selected_page]()
269
+