Spaces:

shwethd
/

DecoderModel124M

Sleeping

App Files Files Community

shwethd commited on Nov 14, 2025

Commit

c037b52

verified ·

1 Parent(s): 227301c

Upload app.py

Browse files

Files changed (1) hide show

app.py +56 -20

app.py CHANGED Viewed

@@ -361,14 +361,32 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
         # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
         generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
         # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
         # Add space before common words that might have been merged
-        common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them']
         for word in common_words:
             # Only add space if it's not already separated and follows a lowercase letter
             pattern = r'([a-z])(' + word + r'\b)'
             generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
         # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
         # Add space after contractions before lowercase words
         contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
@@ -562,9 +580,9 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
                 label="Top-P (Nucleus)",
                 minimum=0.1,
                 maximum=1.0,
-                value=0.9,
                 step=0.05,
-                info="Nucleus sampling - higher = more diverse, lower = more focused (0.9 recommended)"
             )
             repetition_penalty = gr.Slider(
                 label="Repetition Penalty",
@@ -584,26 +602,44 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
                 show_copy_button=True  # Add copy button
             )
-    # Example prompts
-    gr.Markdown("### Example Prompts (Click to try):")
     examples = gr.Examples(
         examples=[
-            ["First Citizen:"],
-            ["ROMEO:"],
-            ["To be or not"],
-            ["HAMLET:"],
-            ["MACBETH:"],
-            ["JULIET:"],
-            ["KING:"],
-            ["LADY MACBETH:"],
-            ["OTHELLO:"],
-            ["What light through yonder"],
-            ["All the world's a stage"],
-            ["Double, double toil and trouble"],
-            ["Friends, Romans, countrymen"],
-            ["A rose by any other name"],
         ],
-        inputs=prompt_input
     )
     generate_btn.click(

         # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
         generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
+        # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This"
+        # Remove spaces in the middle of common words
+        common_words_fix = ['further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our', 'your']
+        for word in common_words_fix:
+            # Pattern: word split incorrectly (e.g., "furt her", "T his")
+            pattern = r'\b' + word[0] + r'\s+' + word[1:] + r'\b'
+            generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
+            # Also handle reversed (less common)
+            if len(word) > 3:
+                pattern2 = r'\b' + word[:-1] + r'\s+' + word[-1] + r'\b'
+                generated_text = re.sub(pattern2, word, generated_text, flags=re.IGNORECASE)
         # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
         # Add space before common words that might have been merged
+        common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them', 'to', 'of', 'in', 'on', 'at', 'as', 'is', 'it', 'he', 'she', 'we', 'they', 'an', 'a']
         for word in common_words:
             # Only add space if it's not already separated and follows a lowercase letter
             pattern = r'([a-z])(' + word + r'\b)'
             generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
+        # Fix 2c: Fix double words (e.g., "but but" -> "but")
+        generated_text = re.sub(r'\b(\w+)\s+\1\b', r'\1', generated_text, flags=re.IGNORECASE)
+        # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
+        generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
         # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
         # Add space after contractions before lowercase words
         contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
                 label="Top-P (Nucleus)",
                 minimum=0.1,
                 maximum=1.0,
+                value=0.85,
                 step=0.05,
+                info="Nucleus sampling - 0.85-0.9 recommended. Lower (0.3) = too restrictive, Higher (0.95+) = too random"
             )
             repetition_penalty = gr.Slider(
                 label="Repetition Penalty",
                 show_copy_button=True  # Add copy button
             )
+    # Example prompts with suggested parameters
+    gr.Markdown("""
+    ### Example Prompts (Click to try - includes optimal settings)
+    **What to Expect:**
+    - **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
+    - **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
+    - **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
+    - **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
+    **Note:** Each example includes pre-configured optimal parameters. The model may generate:
+    - ✅ Shakespearean-style dialogue with proper speaker names
+    - ✅ Theatrical language and phrasing
+    - ⚠️ Some spacing issues (automatically fixed by post-processing)
+    - ⚠️ Occasional repetition (mitigated by repetition penalty)
+    - ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
+    **Tip:** Try different examples to see how the model adapts to different character styles and contexts!
+    """)
     examples = gr.Examples(
         examples=[
+            # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
+            ["First Citizen:", 100, 0.7, 50, 0.85, 1.1],
+            ["ROMEO:", 100, 0.65, 45, 0.88, 1.15],  # Romantic - slightly lower temp
+            ["To be or not", 80, 0.6, 40, 0.85, 1.2],  # Quote - more focused
+            ["HAMLET:", 100, 0.7, 50, 0.85, 1.1],
+            ["MACBETH:", 100, 0.7, 50, 0.85, 1.1],
+            ["JULIET:", 100, 0.65, 45, 0.88, 1.15],  # Romantic
+            ["KING:", 100, 0.7, 50, 0.85, 1.1],
+            ["LADY MACBETH:", 100, 0.7, 50, 0.85, 1.1],
+            ["OTHELLO:", 100, 0.7, 50, 0.85, 1.1],
+            ["What light through yonder", 100, 0.65, 45, 0.88, 1.15],  # Romantic quote
+            ["All the world's a stage", 100, 0.7, 50, 0.85, 1.1],  # Metaphorical
+            ["Double, double toil and trouble", 80, 0.7, 50, 0.85, 1.15],  # Witches chant
+            ["Friends, Romans, countrymen", 100, 0.7, 50, 0.85, 1.1],  # Speech
+            ["A rose by any other name", 100, 0.65, 45, 0.88, 1.15],  # Romantic quote
         ],
+        inputs=[prompt_input, max_tokens, temperature, top_k, top_p, repetition_penalty]
     )
     generate_btn.click(