EpGuy commited on
Commit
135b971
β€’
1 Parent(s): c58d836

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -30
app.py CHANGED
@@ -1,9 +1,6 @@
1
  import gradio as gr
2
  from collections import defaultdict
3
  import random
4
- # tkinter is no longer needed as gradio provides a file uploader
5
- # import tkinter as tk
6
- # from tkinter import filedialog
7
  import re
8
  import nltk
9
  from nltk.tokenize import word_tokenize
@@ -136,24 +133,40 @@ def evaluate_generated_text(generated_text):
136
  # Implement evaluation logic (like how many phrases were replaced, etc.)
137
  return ""
138
 
139
- def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=None):
140
- print(f'Generating sentence from: {start_word}')
141
- sentence = [start_word]
142
- current_word = start_word
143
- context_window_size = 4
144
- max_context_window_size = 100
145
- repetitive_phrases = set()
 
 
 
 
146
 
147
  # Initialize blacklist to an empty list if not provided
148
  if blacklist is None:
 
149
  blacklist = []
150
 
 
 
 
 
151
  for i in range(length):
152
- print(f'Iteration {i+1}: {sentence}')
 
 
 
 
 
 
153
  if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
154
  print(f'Increasing context window size to: {context_window_size + 1}')
155
  context_window_size = min(context_window_size + 1, max_context_window_size)
156
 
 
157
  next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
158
 
159
  if whitelist:
@@ -168,17 +181,10 @@ def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=N
168
  if not next_word_candidates:
169
  break
170
 
171
- next_word = None
172
- while not next_word:
173
- next_word = random.choice(next_word_candidates)
174
- if next_word in blacklist:
175
- next_word_candidates.remove(next_word)
176
- if not next_word_candidates:
177
- break
178
- next_word = None
179
-
180
- if not next_word:
181
- break
182
 
183
  if next_word.startswith('β€œ') and next_word.endswith('”'):
184
  sentence.append(next_word)
@@ -214,7 +220,7 @@ def post_process_generated_text(generated_text):
214
 
215
  return generated_text
216
 
217
- def generate_with_gradio(start_word, file):
218
  # Load the corpus from the uploaded file
219
  corpus = import_corpus(file)
220
 
@@ -228,7 +234,7 @@ def generate_with_gradio(start_word, file):
228
  word2vec_model = train_word2vec(corpus)
229
 
230
  # Generate the sentence
231
- generated_sentence = generate_sentence(language_model, start_word)
232
 
233
  # Replace repetitive phrases
234
  replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
@@ -238,18 +244,23 @@ def generate_with_gradio(start_word, file):
238
 
239
  return processed_sentence
240
 
241
- blacklist = []
242
- whitelist = []
243
- whitelist_weight = 0.1
244
-
245
  nltk.download('punkt')
246
 
247
  # Create a Gradio interface with file uploader
248
  iface = gr.Interface(
249
  fn=generate_with_gradio,
250
- inputs=["text", gr.File(label="Upload Corpus")],
 
 
 
 
 
 
 
 
 
251
  outputs="text",
252
- title="Sentence Generator with Repetivec",
253
  description="Enter a starting word and upload a corpus file to generate a sentence."
254
  )
255
  iface.launch()
 
1
  import gradio as gr
2
  from collections import defaultdict
3
  import random
 
 
 
4
  import re
5
  import nltk
6
  from nltk.tokenize import word_tokenize
 
133
  # Implement evaluation logic (like how many phrases were replaced, etc.)
134
  return ""
135
 
136
+ def generate_sentence(model, start_word, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
137
+ print('======================================================================')
138
+ print('========================== GENERATING SENTENCE ======================')
139
+ print(f'Start word: {start_word}')
140
+ print(f'Length: {length}')
141
+ print(f'Context window size: {context_window_size}')
142
+ print(f'Max context window size: {max_context_window_size}')
143
+ print(f'Blacklist: {blacklist}')
144
+ print(f'Whitelist: {whitelist}')
145
+ print(f'Whitelist weight: {whitelist_weight}')
146
+ print('======================================================================')
147
 
148
  # Initialize blacklist to an empty list if not provided
149
  if blacklist is None:
150
+ print('Initializing blacklist to empty list')
151
  blacklist = []
152
 
153
+ sentence = [start_word]
154
+ current_word = start_word
155
+ repetitive_phrases = set()
156
+
157
  for i in range(length):
158
+ print(f'Iteration {i+1}')
159
+ print(f'Sentence: {sentence}')
160
+ print(f'Current word: {current_word}')
161
+ print(f'Context window size: {context_window_size}')
162
+ print(f'Blacklist: {blacklist}')
163
+ print(f'Whitelist: {whitelist}')
164
+
165
  if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
166
  print(f'Increasing context window size to: {context_window_size + 1}')
167
  context_window_size = min(context_window_size + 1, max_context_window_size)
168
 
169
+ print(f'Next word candidates: {model[current_word].keys()}')
170
  next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
171
 
172
  if whitelist:
 
181
  if not next_word_candidates:
182
  break
183
 
184
+ next_word = random.choice(next_word_candidates)
185
+ if next_word in blacklist:
186
+ print(f'Removing {next_word} from blacklist')
187
+ blacklist.remove(next_word)
 
 
 
 
 
 
 
188
 
189
  if next_word.startswith('β€œ') and next_word.endswith('”'):
190
  sentence.append(next_word)
 
220
 
221
  return generated_text
222
 
223
+ def generate_with_gradio(start_word, file, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
224
  # Load the corpus from the uploaded file
225
  corpus = import_corpus(file)
226
 
 
234
  word2vec_model = train_word2vec(corpus)
235
 
236
  # Generate the sentence
237
+ generated_sentence = generate_sentence(language_model, start_word, length, context_window_size, max_context_window_size, blacklist=blacklist, whitelist=whitelist, whitelist_weight=whitelist_weight)
238
 
239
  # Replace repetitive phrases
240
  replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
 
244
 
245
  return processed_sentence
246
 
 
 
 
 
247
  nltk.download('punkt')
248
 
249
  # Create a Gradio interface with file uploader
250
  iface = gr.Interface(
251
  fn=generate_with_gradio,
252
+ inputs=[
253
+ "text", # Start Word
254
+ gr.File(label="Upload Corpus"), # Corpus File
255
+ gr.Number(label="Length", value=101), # Length
256
+ gr.Number(label="Context Window Size", value=4), # Context Window Size
257
+ gr.Number(label="Max Context Window Size", value=100), # Max Context Window Size
258
+ gr.Textbox(label="Blacklist (comma-separated)"), # Blacklist
259
+ gr.Textbox(label="Whitelist (comma-separated)"), # Whitelist
260
+ gr.Number(label="Whitelist Weight", value=0.1) # Whitelist Weight
261
+ ],
262
  outputs="text",
263
+ title="Sentence Generator with Repetivecc",
264
  description="Enter a starting word and upload a corpus file to generate a sentence."
265
  )
266
  iface.launch()