shwethd commited on
Commit
c037b52
·
verified ·
1 Parent(s): 227301c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -20
app.py CHANGED
@@ -361,14 +361,32 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
361
  # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
362
  generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
363
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
365
  # Add space before common words that might have been merged
366
- common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them']
367
  for word in common_words:
368
  # Only add space if it's not already separated and follows a lowercase letter
369
  pattern = r'([a-z])(' + word + r'\b)'
370
  generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
371
 
 
 
 
 
 
 
372
  # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
373
  # Add space after contractions before lowercase words
374
  contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
@@ -562,9 +580,9 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
562
  label="Top-P (Nucleus)",
563
  minimum=0.1,
564
  maximum=1.0,
565
- value=0.9,
566
  step=0.05,
567
- info="Nucleus sampling - higher = more diverse, lower = more focused (0.9 recommended)"
568
  )
569
  repetition_penalty = gr.Slider(
570
  label="Repetition Penalty",
@@ -584,26 +602,44 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
584
  show_copy_button=True # Add copy button
585
  )
586
 
587
- # Example prompts
588
- gr.Markdown("### Example Prompts (Click to try):")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  examples = gr.Examples(
590
  examples=[
591
- ["First Citizen:"],
592
- ["ROMEO:"],
593
- ["To be or not"],
594
- ["HAMLET:"],
595
- ["MACBETH:"],
596
- ["JULIET:"],
597
- ["KING:"],
598
- ["LADY MACBETH:"],
599
- ["OTHELLO:"],
600
- ["What light through yonder"],
601
- ["All the world's a stage"],
602
- ["Double, double toil and trouble"],
603
- ["Friends, Romans, countrymen"],
604
- ["A rose by any other name"],
 
605
  ],
606
- inputs=prompt_input
607
  )
608
 
609
  generate_btn.click(
 
361
  # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
362
  generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
363
 
364
+ # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This"
365
+ # Remove spaces in the middle of common words
366
+ common_words_fix = ['further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our', 'your']
367
+ for word in common_words_fix:
368
+ # Pattern: word split incorrectly (e.g., "furt her", "T his")
369
+ pattern = r'\b' + word[0] + r'\s+' + word[1:] + r'\b'
370
+ generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
371
+ # Also handle reversed (less common)
372
+ if len(word) > 3:
373
+ pattern2 = r'\b' + word[:-1] + r'\s+' + word[-1] + r'\b'
374
+ generated_text = re.sub(pattern2, word, generated_text, flags=re.IGNORECASE)
375
+
376
  # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
377
  # Add space before common words that might have been merged
378
+ common_words = ['with', 'the', 'and', 'that', 'this', 'have', 'from', 'not', 'but', 'for', 'are', 'was', 'were', 'been', 'will', 'shall', 'would', 'could', 'should', 'be', 'your', 'you', 'our', 'my', 'his', 'her', 'their', 'him', 'them', 'to', 'of', 'in', 'on', 'at', 'as', 'is', 'it', 'he', 'she', 'we', 'they', 'an', 'a']
379
  for word in common_words:
380
  # Only add space if it's not already separated and follows a lowercase letter
381
  pattern = r'([a-z])(' + word + r'\b)'
382
  generated_text = re.sub(pattern, r'\1 \2', generated_text, flags=re.IGNORECASE)
383
 
384
+ # Fix 2c: Fix double words (e.g., "but but" -> "but")
385
+ generated_text = re.sub(r'\b(\w+)\s+\1\b', r'\1', generated_text, flags=re.IGNORECASE)
386
+
387
+ # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
388
+ generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
389
+
390
  # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
391
  # Add space after contractions before lowercase words
392
  contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
 
580
  label="Top-P (Nucleus)",
581
  minimum=0.1,
582
  maximum=1.0,
583
+ value=0.85,
584
  step=0.05,
585
+ info="Nucleus sampling - 0.85-0.9 recommended. Lower (0.3) = too restrictive, Higher (0.95+) = too random"
586
  )
587
  repetition_penalty = gr.Slider(
588
  label="Repetition Penalty",
 
602
  show_copy_button=True # Add copy button
603
  )
604
 
605
+ # Example prompts with suggested parameters
606
+ gr.Markdown("""
607
+ ### Example Prompts (Click to try - includes optimal settings)
608
+
609
+ **What to Expect:**
610
+ - **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
611
+ - **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
612
+ - **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
613
+ - **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
614
+
615
+ **Note:** Each example includes pre-configured optimal parameters. The model may generate:
616
+ - ✅ Shakespearean-style dialogue with proper speaker names
617
+ - ✅ Theatrical language and phrasing
618
+ - ⚠️ Some spacing issues (automatically fixed by post-processing)
619
+ - ⚠️ Occasional repetition (mitigated by repetition penalty)
620
+ - ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
621
+
622
+ **Tip:** Try different examples to see how the model adapts to different character styles and contexts!
623
+ """)
624
  examples = gr.Examples(
625
  examples=[
626
+ # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
627
+ ["First Citizen:", 100, 0.7, 50, 0.85, 1.1],
628
+ ["ROMEO:", 100, 0.65, 45, 0.88, 1.15], # Romantic - slightly lower temp
629
+ ["To be or not", 80, 0.6, 40, 0.85, 1.2], # Quote - more focused
630
+ ["HAMLET:", 100, 0.7, 50, 0.85, 1.1],
631
+ ["MACBETH:", 100, 0.7, 50, 0.85, 1.1],
632
+ ["JULIET:", 100, 0.65, 45, 0.88, 1.15], # Romantic
633
+ ["KING:", 100, 0.7, 50, 0.85, 1.1],
634
+ ["LADY MACBETH:", 100, 0.7, 50, 0.85, 1.1],
635
+ ["OTHELLO:", 100, 0.7, 50, 0.85, 1.1],
636
+ ["What light through yonder", 100, 0.65, 45, 0.88, 1.15], # Romantic quote
637
+ ["All the world's a stage", 100, 0.7, 50, 0.85, 1.1], # Metaphorical
638
+ ["Double, double toil and trouble", 80, 0.7, 50, 0.85, 1.15], # Witches chant
639
+ ["Friends, Romans, countrymen", 100, 0.7, 50, 0.85, 1.1], # Speech
640
+ ["A rose by any other name", 100, 0.65, 45, 0.88, 1.15], # Romantic quote
641
  ],
642
+ inputs=[prompt_input, max_tokens, temperature, top_k, top_p, repetition_penalty]
643
  )
644
 
645
  generate_btn.click(