Spaces:

ansfarooq7
/

l4-project

Sleeping

App Files Files Community

ansfarooq7 commited on Mar 1, 2022

Commit

651a92a

•

1 Parent(s): e37de85

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -13

app.py CHANGED Viewed

@@ -1,18 +1,19 @@
-from transformers import RobertaTokenizer, RobertaForMaskedLM, GPT2Tokenizer
 import torch
 import wikipedia
 import re
 import random
 import nltk
 import gradio as gr
-from aitextgen import aitextgen
 nltk.download('cmudict')
 roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
 roberta_model = RobertaForMaskedLM.from_pretrained('roberta-base')
 gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-gpt2_model = aitextgen()
 frequent_words = set()
@@ -23,7 +24,7 @@ with open("wordFrequency.txt", 'r') as f:
         line = f.readline()
 def filter_rhymes(word):
-    filter_list = ['to', 'on', 'has', 'but', 'the', 'in', 'and', 'a', 'aitch', 'angst', 'arugula', 'beige', 'blitzed', 'boing', 'bombed', 'cairn', 'chaos', 'chocolate', 'circle', 'circus', 'cleansed', 'coif', 'cusp', 'doth', 'else', 'eth', 'fiends', 'film', 'flange', 'fourths', 'grilse', 'gulf', 'kiln', 'loge', 'midst', 'month', 'music', 'neutron', 'ninja', 'oblige', 'oink', 'opus', 'orange', 'pint', 'plagued', 'plankton', 'plinth', 'poem', 'poet', 'purple', 'quaich', 'rhythm', 'rouged', 'silver', 'siren', 'soldier', 'sylph', 'thesp', 'toilet', 'torsk', 'tufts', 'waltzed', 'wasp', 'wharves', 'width', 'woman', 'yttrium']
     if word in filter_list:
         return False
     else:
@@ -80,15 +81,35 @@ def get_prediction(sent):
         best_guess = best_guess+" "+j[0]
     return best_guess
 def get_line(prompt, inputs_len):
-    line = gpt2_model.generate_one(prompt=prompt + ".", max_length=inputs_len + 7, min_length=4)[len(prompt)+2:]
-    return line
 def get_rhyming_line(prompt, rhyming_word, inputs_len):
-    gpt2_sentence = gpt2_model.generate_one(prompt=prompt + ".", max_length=inputs_len + 4, min_length=2)[len(prompt)+2:]
     while len(gpt2_sentence) == 0:
-        gpt2_sentence = gpt2_model.generate_one(prompt=prompt + ".", max_length=inputs_len + 4, min_length=2)[len(prompt)+2:]
     gpt2_sentence = gpt2_sentence.replace("\n", "")
     print(f"\nGetting rhyming line starting with '{gpt2_sentence}' and ending with rhyming word '{rhyming_word}'")
@@ -108,16 +129,26 @@ def get_rhyming_line(prompt, rhyming_word, inputs_len):
     return final_sentence
 def gpt2_summary(topic):
-    return gpt2_model.generate_one(prompt=f"Here is some information about {topic}.", top_k=50, top_p=0.95, min_length=200)
 def generate(topic, wiki=True):
     if wiki:
         try:
             topic_search = wikipedia.search(topic, results=3)
             topic_summary = remove_punctuation(wikipedia.summary(topic_search[0], auto_suggest=False))
         except wikipedia.DisambiguationError as e:
             page = e.options[0]
-            topic_summary = remove_punctuation(wikipedia.summary(page))
         except:
             return(f"Method A struggled to find information about {topic}, please try a different topic!")
     else:
@@ -205,7 +236,7 @@ def compare_summaries(topic):
     print(output1 + "\n" + output2)
     return output1, output2
 description = "Generates limericks (five-line poems with a rhyme scheme of AABBA) using two different methods, please be patient as it can take up to a minute to generate both limericks."
 article = '<center><big><strong>Limerick Generation</strong></big></center>'\
         '<center><strong>By Ans Farooq</strong></center>'\

+from transformers import RobertaTokenizer, RobertaForMaskedLM, GPT2Tokenizer, GPT2LMHeadModel, pipeline
 import torch
 import wikipedia
 import re
 import random
 import nltk
 import gradio as gr
 nltk.download('cmudict')
 roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
 roberta_model = RobertaForMaskedLM.from_pretrained('roberta-base')
 gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2", pad_token_id=gpt2_tokenizer.eos_token_id)
+gpt2_pipeline = pipeline('text-generation', model=gpt2_model, tokenizer=gpt2_tokenizer)
 frequent_words = set()
         line = f.readline()
 def filter_rhymes(word):
+    filter_list = ['an', 'to', 'on', 'has', 'but', 'the', 'in', 'and', 'a']
     if word in filter_list:
         return False
     else:
         best_guess = best_guess+" "+j[0]
     return best_guess
 def get_line(prompt, inputs_len):
+    output = gpt2_pipeline(
+        prompt + ".",
+        min_length=4,
+        max_length=inputs_len + 7,
+        clean_up_tokenization_spaces=True,
+        return_full_text=False
+    )
+    return remove_punctuation(output[0]['generated_text'])
 def get_rhyming_line(prompt, rhyming_word, inputs_len):
+    output = gpt2_pipeline(
+        prompt + ".",
+        min_length=4,
+        max_length=inputs_len + 3,
+        clean_up_tokenization_spaces=True,
+        return_full_text=False
+    )
+    gpt2_sentence = remove_punctuation(output[0]['generated_text'])
     while len(gpt2_sentence) == 0:
+        output = gpt2_pipeline(
+            prompt + ".",
+            min_length=4,
+            max_length=inputs_len + 3,
+            clean_up_tokenization_spaces=True,
+            return_full_text=False
+        )
+        gpt2_sentence = remove_punctuation(output[0]['generated_text'])
     gpt2_sentence = gpt2_sentence.replace("\n", "")
     print(f"\nGetting rhyming line starting with '{gpt2_sentence}' and ending with rhyming word '{rhyming_word}'")
     return final_sentence
 def gpt2_summary(topic):
+    output = gpt2_pipeline(
+        f"Here is some information about {topic}.",
+        min_length=200,
+        max_length=300,
+        clean_up_tokenization_spaces=True,
+        return_full_text=False
+    )
+    return remove_punctuation(output[0]['generated_text'])
 def generate(topic, wiki=True):
     if wiki:
         try:
             topic_search = wikipedia.search(topic, results=3)
+            print(f"Wikipedia search results for {topic} are: {topic_search}")
             topic_summary = remove_punctuation(wikipedia.summary(topic_search[0], auto_suggest=False))
         except wikipedia.DisambiguationError as e:
+            print("===================== DISAMBIGUATION ERROR =====================")
+            print(f"Wikipedia returned a disambiguation error for {topic}. Selecting the first option {e.options[0]} instead.")
             page = e.options[0]
+            topic_summary = remove_punctuation(wikipedia.summary(page, auto_suggest=False))
         except:
             return(f"Method A struggled to find information about {topic}, please try a different topic!")
     else:
     print(output1 + "\n" + output2)
     return output1, output2
 description = "Generates limericks (five-line poems with a rhyme scheme of AABBA) using two different methods, please be patient as it can take up to a minute to generate both limericks."
 article = '<center><big><strong>Limerick Generation</strong></big></center>'\
         '<center><strong>By Ans Farooq</strong></center>'\