Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -361,14 +361,32 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 361 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
|
| 362 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|
| 363 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
# Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
|
| 365 |
# Add space before common words that might have been merged
|
| 366 |
-
common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them']
|
| 367 |
for word in common_words:
|
| 368 |
# Only add space if it's not already separated and follows a lowercase letter
|
| 369 |
pattern = r'([a-z])(' + word + r'\b)'
|
| 370 |
generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
# Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
|
| 373 |
# Add space after contractions before lowercase words
|
| 374 |
contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
|
|
@@ -562,9 +580,9 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
|
|
| 562 |
label="Top-P (Nucleus)",
|
| 563 |
minimum=0.1,
|
| 564 |
maximum=1.0,
|
| 565 |
-
value=0.
|
| 566 |
step=0.05,
|
| 567 |
-
info="Nucleus sampling -
|
| 568 |
)
|
| 569 |
repetition_penalty = gr.Slider(
|
| 570 |
label="Repetition Penalty",
|
|
@@ -584,26 +602,44 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
|
|
| 584 |
show_copy_button=True # Add copy button
|
| 585 |
)
|
| 586 |
|
| 587 |
-
# Example prompts
|
| 588 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
examples = gr.Examples(
|
| 590 |
examples=[
|
| 591 |
-
[
|
| 592 |
-
["
|
| 593 |
-
["
|
| 594 |
-
["
|
| 595 |
-
["
|
| 596 |
-
["
|
| 597 |
-
["
|
| 598 |
-
["
|
| 599 |
-
["
|
| 600 |
-
["
|
| 601 |
-
["
|
| 602 |
-
["
|
| 603 |
-
["
|
| 604 |
-
["
|
|
|
|
| 605 |
],
|
| 606 |
-
inputs=prompt_input
|
| 607 |
)
|
| 608 |
|
| 609 |
generate_btn.click(
|
|
|
|
| 361 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
|
| 362 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|
| 363 |
|
| 364 |
+
# Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This"
|
| 365 |
+
# Remove spaces in the middle of common words
|
| 366 |
+
common_words_fix = ['further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our', 'your']
|
| 367 |
+
for word in common_words_fix:
|
| 368 |
+
# Pattern: word split incorrectly (e.g., "furt her", "T his")
|
| 369 |
+
pattern = r'\b' + word[0] + r'\s+' + word[1:] + r'\b'
|
| 370 |
+
generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
|
| 371 |
+
# Also handle reversed (less common)
|
| 372 |
+
if len(word) > 3:
|
| 373 |
+
pattern2 = r'\b' + word[:-1] + r'\s+' + word[-1] + r'\b'
|
| 374 |
+
generated_text = re.sub(pattern2, word, generated_text, flags=re.IGNORECASE)
|
| 375 |
+
|
| 376 |
# Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
|
| 377 |
# Add space before common words that might have been merged
|
| 378 |
+
common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them', 'to', 'of', 'in', 'on', 'at', 'as', 'is', 'it', 'he', 'she', 'we', 'they', 'an', 'a']
|
| 379 |
for word in common_words:
|
| 380 |
# Only add space if it's not already separated and follows a lowercase letter
|
| 381 |
pattern = r'([a-z])(' + word + r'\b)'
|
| 382 |
generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
|
| 383 |
|
| 384 |
+
# Fix 2c: Fix double words (e.g., "but but" -> "but")
|
| 385 |
+
generated_text = re.sub(r'\b(\w+)\s+\1\b', r'\1', generated_text, flags=re.IGNORECASE)
|
| 386 |
+
|
| 387 |
+
# Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
|
| 388 |
+
generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
|
| 389 |
+
|
| 390 |
# Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
|
| 391 |
# Add space after contractions before lowercase words
|
| 392 |
contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
|
|
|
|
| 580 |
label="Top-P (Nucleus)",
|
| 581 |
minimum=0.1,
|
| 582 |
maximum=1.0,
|
| 583 |
+
value=0.85,
|
| 584 |
step=0.05,
|
| 585 |
+
info="Nucleus sampling - 0.85-0.9 recommended. Lower (0.3) = too restrictive, Higher (0.95+) = too random"
|
| 586 |
)
|
| 587 |
repetition_penalty = gr.Slider(
|
| 588 |
label="Repetition Penalty",
|
|
|
|
| 602 |
show_copy_button=True # Add copy button
|
| 603 |
)
|
| 604 |
|
| 605 |
+
# Example prompts with suggested parameters
|
| 606 |
+
gr.Markdown("""
|
| 607 |
+
### Example Prompts (Click to try - includes optimal settings)
|
| 608 |
+
|
| 609 |
+
**What to Expect:**
|
| 610 |
+
- **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
|
| 611 |
+
- **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
|
| 612 |
+
- **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
|
| 613 |
+
- **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
|
| 614 |
+
|
| 615 |
+
**Note:** Each example includes pre-configured optimal parameters. The model may generate:
|
| 616 |
+
- ✅ Shakespearean-style dialogue with proper speaker names
|
| 617 |
+
- ✅ Theatrical language and phrasing
|
| 618 |
+
- ⚠️ Some spacing issues (automatically fixed by post-processing)
|
| 619 |
+
- ⚠️ Occasional repetition (mitigated by repetition penalty)
|
| 620 |
+
- ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
|
| 621 |
+
|
| 622 |
+
**Tip:** Try different examples to see how the model adapts to different character styles and contexts!
|
| 623 |
+
""")
|
| 624 |
examples = gr.Examples(
|
| 625 |
examples=[
|
| 626 |
+
# Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
|
| 627 |
+
["First Citizen:", 100, 0.7, 50, 0.85, 1.1],
|
| 628 |
+
["ROMEO:", 100, 0.65, 45, 0.88, 1.15], # Romantic - slightly lower temp
|
| 629 |
+
["To be or not", 80, 0.6, 40, 0.85, 1.2], # Quote - more focused
|
| 630 |
+
["HAMLET:", 100, 0.7, 50, 0.85, 1.1],
|
| 631 |
+
["MACBETH:", 100, 0.7, 50, 0.85, 1.1],
|
| 632 |
+
["JULIET:", 100, 0.65, 45, 0.88, 1.15], # Romantic
|
| 633 |
+
["KING:", 100, 0.7, 50, 0.85, 1.1],
|
| 634 |
+
["LADY MACBETH:", 100, 0.7, 50, 0.85, 1.1],
|
| 635 |
+
["OTHELLO:", 100, 0.7, 50, 0.85, 1.1],
|
| 636 |
+
["What light through yonder", 100, 0.65, 45, 0.88, 1.15], # Romantic quote
|
| 637 |
+
["All the world's a stage", 100, 0.7, 50, 0.85, 1.1], # Metaphorical
|
| 638 |
+
["Double, double toil and trouble", 80, 0.7, 50, 0.85, 1.15], # Witches chant
|
| 639 |
+
["Friends, Romans, countrymen", 100, 0.7, 50, 0.85, 1.1], # Speech
|
| 640 |
+
["A rose by any other name", 100, 0.65, 45, 0.88, 1.15], # Romantic quote
|
| 641 |
],
|
| 642 |
+
inputs=[prompt_input, max_tokens, temperature, top_k, top_p, repetition_penalty]
|
| 643 |
)
|
| 644 |
|
| 645 |
generate_btn.click(
|