Spaces:

EpGuy
/

Repetivec

Sleeping

App Files Files Community

EpGuy commited on Apr 5

Commit

135b971

•

1 Parent(s): c58d836

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -30

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import gradio as gr
 from collections import defaultdict
 import random
-# tkinter is no longer needed as gradio provides a file uploader
-# import tkinter as tk
-# from tkinter import filedialog
 import re
 import nltk
 from nltk.tokenize import word_tokenize
@@ -136,24 +133,40 @@ def evaluate_generated_text(generated_text):
     # Implement evaluation logic (like how many phrases were replaced, etc.)
     return ""
-def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=None):
-    print(f'Generating sentence from: {start_word}')
-    sentence = [start_word]
-    current_word = start_word
-    context_window_size = 4
-    max_context_window_size = 100
-    repetitive_phrases = set()
     # Initialize blacklist to an empty list if not provided
     if blacklist is None:
         blacklist = []
     for i in range(length):
-        print(f'Iteration {i+1}: {sentence}')
         if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
             print(f'Increasing context window size to: {context_window_size + 1}')
             context_window_size = min(context_window_size + 1, max_context_window_size)
         next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
         if whitelist:
@@ -168,17 +181,10 @@ def generate_sentence(model, start_word, length=101, blacklist=None, whitelist=N
         if not next_word_candidates:
             break
-        next_word = None
-        while not next_word:
-            next_word = random.choice(next_word_candidates)
-            if next_word in blacklist:
-                next_word_candidates.remove(next_word)
-                if not next_word_candidates:
-                    break
-                next_word = None
-        if not next_word:
-            break
         if next_word.startswith('“') and next_word.endswith('”'):
             sentence.append(next_word)
@@ -214,7 +220,7 @@ def post_process_generated_text(generated_text):
     return generated_text
-def generate_with_gradio(start_word, file):
     # Load the corpus from the uploaded file
     corpus = import_corpus(file)
@@ -228,7 +234,7 @@ def generate_with_gradio(start_word, file):
     word2vec_model = train_word2vec(corpus)
     # Generate the sentence
-    generated_sentence = generate_sentence(language_model, start_word)
     # Replace repetitive phrases
     replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
@@ -238,18 +244,23 @@ def generate_with_gradio(start_word, file):
     return processed_sentence
-blacklist = []
-whitelist = []
-whitelist_weight = 0.1
 nltk.download('punkt')
 # Create a Gradio interface with file uploader
 iface = gr.Interface(
     fn=generate_with_gradio,
-    inputs=["text", gr.File(label="Upload Corpus")],
     outputs="text",
-    title="Sentence Generator with Repetivec",
     description="Enter a starting word and upload a corpus file to generate a sentence."
 )
 iface.launch()

 import gradio as gr
 from collections import defaultdict
 import random
 import re
 import nltk
 from nltk.tokenize import word_tokenize
     # Implement evaluation logic (like how many phrases were replaced, etc.)
     return ""
+def generate_sentence(model, start_word, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
+    print('======================================================================')
+    print('========================== GENERATING SENTENCE ======================')
+    print(f'Start word: {start_word}')
+    print(f'Length: {length}')
+    print(f'Context window size: {context_window_size}')
+    print(f'Max context window size: {max_context_window_size}')
+    print(f'Blacklist: {blacklist}')
+    print(f'Whitelist: {whitelist}')
+    print(f'Whitelist weight: {whitelist_weight}')
+    print('======================================================================')
     # Initialize blacklist to an empty list if not provided
     if blacklist is None:
+        print('Initializing blacklist to empty list')
         blacklist = []
+    sentence = [start_word]
+    current_word = start_word
+    repetitive_phrases = set()
     for i in range(length):
+        print(f'Iteration {i+1}')
+        print(f'Sentence: {sentence}')
+        print(f'Current word: {current_word}')
+        print(f'Context window size: {context_window_size}')
+        print(f'Blacklist: {blacklist}')
+        print(f'Whitelist: {whitelist}')
         if len(sentence) >= context_window_size and tuple(sentence[-context_window_size:]) in repetitive_phrases:
             print(f'Increasing context window size to: {context_window_size + 1}')
             context_window_size = min(context_window_size + 1, max_context_window_size)
+        print(f'Next word candidates: {model[current_word].keys()}')
         next_word_candidates = [word for word in model[current_word].keys() if word not in blacklist]
         if whitelist:
         if not next_word_candidates:
             break
+        next_word = random.choice(next_word_candidates)
+        if next_word in blacklist:
+            print(f'Removing {next_word} from blacklist')
+            blacklist.remove(next_word)
         if next_word.startswith('“') and next_word.endswith('”'):
             sentence.append(next_word)
     return generated_text
+def generate_with_gradio(start_word, file, length=101, context_window_size=4, max_context_window_size=100, blacklist=None, whitelist=None, whitelist_weight=0.1):
     # Load the corpus from the uploaded file
     corpus = import_corpus(file)
     word2vec_model = train_word2vec(corpus)
     # Generate the sentence
+    generated_sentence = generate_sentence(language_model, start_word, length, context_window_size, max_context_window_size, blacklist=blacklist, whitelist=whitelist, whitelist_weight=whitelist_weight)
     # Replace repetitive phrases
     replaced_sentence = replace_repetitive_phrases(generated_sentence, word2vec_model)
     return processed_sentence
 nltk.download('punkt')
 # Create a Gradio interface with file uploader
 iface = gr.Interface(
     fn=generate_with_gradio,
+    inputs=[
+        "text",  # Start Word
+        gr.File(label="Upload Corpus"),  # Corpus File
+        gr.Number(label="Length", value=101),  # Length
+        gr.Number(label="Context Window Size", value=4),  # Context Window Size
+        gr.Number(label="Max Context Window Size", value=100),  # Max Context Window Size
+        gr.Textbox(label="Blacklist (comma-separated)"),  # Blacklist
+        gr.Textbox(label="Whitelist (comma-separated)"),  # Whitelist
+        gr.Number(label="Whitelist Weight", value=0.1)  # Whitelist Weight
+    ],
     outputs="text",
+    title="Sentence Generator with Repetivecc",
     description="Enter a starting word and upload a corpus file to generate a sentence."
 )
 iface.launch()