Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from collections import defaultdict
|
3 |
import random
|
4 |
-
# tkinter is no longer needed as gradio provides a file uploader
|
5 |
-
# import tkinter as tk
|
6 |
-
# from tkinter import filedialog
|
7 |
import re
|
8 |
import nltk
|
9 |
from nltk.tokenize import word_tokenize
|
@@ -136,24 +133,40 @@ def evaluate_generated_text(generated_text):
|
|
136 |
# Implement evaluation logic (like how many phrases were replaced, etc.)
|
137 |
return ""
|
138 |
|
139 |
-
def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=None):
|
140 |
-
print(
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
146 |
|
147 |
# Initialize blacklist to an empty list if not provided
|
148 |
if blacklist is None:
|
|
|
149 |
blacklist = []
|
150 |
|
|
|
|
|
|
|
|
|
151 |
for i in range(length):
|
152 |
-
print(f'Iteration {i+1}
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
|
154 |
print(f'Increasing context window size to: {context_window_size + 1}')
|
155 |
context_window_size = min(context_window_size + 1, max_context_window_size)
|
156 |
|
|
|
157 |
next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
|
158 |
|
159 |
if whitelist:
|
@@ -168,17 +181,10 @@ def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=N
|
|
168 |
if not next_word_candidates:
|
169 |
break
|
170 |
|
171 |
-
next_word =
|
172 |
-
|
173 |
-
next_word
|
174 |
-
|
175 |
-
next_word_candidates.remove(next_word)
|
176 |
-
if not next_word_candidates:
|
177 |
-
break
|
178 |
-
next_word = None
|
179 |
-
|
180 |
-
if not next_word:
|
181 |
-
break
|
182 |
|
183 |
if next_word.startswith('β') and next_word.endswith('β'):
|
184 |
sentence.append(next_word)
|
@@ -214,7 +220,7 @@ def post_process_generated_text(generated_text):
|
|
214 |
|
215 |
return generated_text
|
216 |
|
217 |
-
def generate_with_gradio(start_word, file):
|
218 |
# Load the corpus from the uploaded file
|
219 |
corpus = import_corpus(file)
|
220 |
|
@@ -228,7 +234,7 @@ def generate_with_gradio(start_word, file):
|
|
228 |
word2vec_model = train_word2vec(corpus)
|
229 |
|
230 |
# Generate the sentence
|
231 |
-
generated_sentence = generate_sentence(language_model, start_word)
|
232 |
|
233 |
# Replace repetitive phrases
|
234 |
replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
|
@@ -238,18 +244,23 @@ def generate_with_gradio(start_word, file):
|
|
238 |
|
239 |
return processed_sentence
|
240 |
|
241 |
-
blacklist = []
|
242 |
-
whitelist = []
|
243 |
-
whitelist_weight = 0.1
|
244 |
-
|
245 |
nltk.download('punkt')
|
246 |
|
247 |
# Create a Gradio interface with file uploader
|
248 |
iface = gr.Interface(
|
249 |
fn=generate_with_gradio,
|
250 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
outputs="text",
|
252 |
-
title="Sentence Generator with
|
253 |
description="Enter a starting word and upload a corpus file to generate a sentence."
|
254 |
)
|
255 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from collections import defaultdict
|
3 |
import random
|
|
|
|
|
|
|
4 |
import re
|
5 |
import nltk
|
6 |
from nltk.tokenize import word_tokenize
|
|
|
133 |
# Implement evaluation logic (like how many phrases were replaced, etc.)
|
134 |
return ""
|
135 |
|
136 |
+
def generate_sentence(model, start_word, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
|
137 |
+
print('======================================================================')
|
138 |
+
print('========================== GENERATING SENTENCE ======================')
|
139 |
+
print(f'Start word: {start_word}')
|
140 |
+
print(f'Length: {length}')
|
141 |
+
print(f'Context window size: {context_window_size}')
|
142 |
+
print(f'Max context window size: {max_context_window_size}')
|
143 |
+
print(f'Blacklist: {blacklist}')
|
144 |
+
print(f'Whitelist: {whitelist}')
|
145 |
+
print(f'Whitelist weight: {whitelist_weight}')
|
146 |
+
print('======================================================================')
|
147 |
|
148 |
# Initialize blacklist to an empty list if not provided
|
149 |
if blacklist is None:
|
150 |
+
print('Initializing blacklist to empty list')
|
151 |
blacklist = []
|
152 |
|
153 |
+
sentence = [start_word]
|
154 |
+
current_word = start_word
|
155 |
+
repetitive_phrases = set()
|
156 |
+
|
157 |
for i in range(length):
|
158 |
+
print(f'Iteration {i+1}')
|
159 |
+
print(f'Sentence: {sentence}')
|
160 |
+
print(f'Current word: {current_word}')
|
161 |
+
print(f'Context window size: {context_window_size}')
|
162 |
+
print(f'Blacklist: {blacklist}')
|
163 |
+
print(f'Whitelist: {whitelist}')
|
164 |
+
|
165 |
if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
|
166 |
print(f'Increasing context window size to: {context_window_size + 1}')
|
167 |
context_window_size = min(context_window_size + 1, max_context_window_size)
|
168 |
|
169 |
+
print(f'Next word candidates: {model[current_word].keys()}')
|
170 |
next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
|
171 |
|
172 |
if whitelist:
|
|
|
181 |
if not next_word_candidates:
|
182 |
break
|
183 |
|
184 |
+
next_word = random.choice(next_word_candidates)
|
185 |
+
if next_word in blacklist:
|
186 |
+
print(f'Removing {next_word} from blacklist')
|
187 |
+
blacklist.remove(next_word)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
if next_word.startswith('β') and next_word.endswith('β'):
|
190 |
sentence.append(next_word)
|
|
|
220 |
|
221 |
return generated_text
|
222 |
|
223 |
+
def generate_with_gradio(start_word, file, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
|
224 |
# Load the corpus from the uploaded file
|
225 |
corpus = import_corpus(file)
|
226 |
|
|
|
234 |
word2vec_model = train_word2vec(corpus)
|
235 |
|
236 |
# Generate the sentence
|
237 |
+
generated_sentence = generate_sentence(language_model, start_word, length, context_window_size, max_context_window_size, blacklist=blacklist, whitelist=whitelist, whitelist_weight=whitelist_weight)
|
238 |
|
239 |
# Replace repetitive phrases
|
240 |
replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
|
|
|
244 |
|
245 |
return processed_sentence
|
246 |
|
|
|
|
|
|
|
|
|
247 |
nltk.download('punkt')
|
248 |
|
249 |
# Create a Gradio interface with file uploader
|
250 |
iface = gr.Interface(
|
251 |
fn=generate_with_gradio,
|
252 |
+
inputs=[
|
253 |
+
"text", # Start Word
|
254 |
+
gr.File(label="Upload Corpus"), # Corpus File
|
255 |
+
gr.Number(label="Length", value=101), # Length
|
256 |
+
gr.Number(label="Context Window Size", value=4), # Context Window Size
|
257 |
+
gr.Number(label="Max Context Window Size", value=100), # Max Context Window Size
|
258 |
+
gr.Textbox(label="Blacklist (comma-separated)"), # Blacklist
|
259 |
+
gr.Textbox(label="Whitelist (comma-separated)"), # Whitelist
|
260 |
+
gr.Number(label="Whitelist Weight", value=0.1) # Whitelist Weight
|
261 |
+
],
|
262 |
outputs="text",
|
263 |
+
title="Sentence Generator with Repetivecc",
|
264 |
description="Enter a starting word and upload a corpus file to generate a sentence."
|
265 |
)
|
266 |
iface.launch()
|