Spaces:

Eddevs
/

brian-challenge

Runtime error

App Files Files Community

Christian Koch commited on Apr 24, 2022

Commit

cd3659c

1 Parent(s): 479b050

further improvements, implement question generator

Browse files

Files changed (2) hide show

app.py +30 -76
question_gen.py +26 -0

app.py CHANGED Viewed

@@ -1,84 +1,52 @@
 import streamlit as st
-from transformers import pipeline, PegasusForConditionalGeneration, PegasusTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
-import nltk
 from fill_in_summary import FillInSummary
 from paraphrase import PegasusParaphraser
-import question_generator as q
-nltk.download('punkt')
-tokenizer = AutoTokenizer.from_pretrained("google/mt5-small")
-model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-small")
 st.set_page_config(layout="centered")
 st.title('Question Generator by Eddevs')
-select = st.selectbox('Type',  ['Question Generator', 'Paraphrasing', 'Summarization', 'Fill in the blank'])
 if select == "Question Generator":
     with st.form("question_gen"):
-        # left_column, right_column = st.columns(2)
-        # left_column.selectbox('Type',  ['Question Generator', 'Paraphrasing'])
-        #st.selectbox('Model',  ['T5', 'GPT Neo-X'])
-        text_input = st.text_area("Input Text")
         submitted = st.form_submit_button("Generate")
-        split = st.checkbox('Split into sentences', value=True)
-        if split:
-            # Split into sentences
-            sent_tokenized = nltk.sent_tokenize(text_input)
-            res = {}
-            with st.spinner('Please wait while the inputs are being processed...'):
-                # Iterate over sentences
-                for sentence in sent_tokenized:
-                    predictions = model.multitask([sentence], max_length=512)
-                    questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions[
-                        'answers_bis']
-                    # Build answer dict
-                    content = {}
-                    for question, answer, answer_bis in zip(questions[0], answers[0], answers_bis[0]):
-                        content[question] = {'answer (extracted)': answer, 'answer (generated)': answer_bis}
-                    res[sentence] = content
-            # Answer area
-            st.write(res)
-        else:
-            with st.spinner('Please wait while the inputs are being processed...'):
-                # Prediction
-                predictions = model.multitask([text_input], max_length=512)
-                questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions[
-                    'answers_bis']
-                # Answer area
-                zip = zip(questions[0], answers[0], answers_bis[0])
-                content = {}
-                for question, answer, answer_bis in zip:
-                    content[question] = {'answer (extracted)': answer, 'answer (generated)': answer_bis}
-            st.write(content)
-            if submitted:
-                with st.spinner('Wait for it...'):
-                    result = FillInSummary().summarize(text_input)
-                st.write(text_input)
 elif select == "Summarization":
     with st.form("summarization"):
-        # left_column, right_column = st.columns(2)
-        # left_column.selectbox('Type',  ['Question Generator', 'Paraphrasing'])
-        #st.selectbox('Model',  ['T5', 'GPT Neo-X'])
-        text_input = st.text_area("Input Text")
         submitted = st.form_submit_button("Generate")
@@ -90,7 +58,7 @@ elif select == "Summarization":
 elif select == "Fill in the blank":
     with st.form("fill_in_the_blank"):
-        text_input = st.text_area("Input Text")
         submitted = st.form_submit_button("Generate")
@@ -104,29 +72,15 @@ elif select == "Fill in the blank":
 elif select == "Paraphrasing":
     with st.form("paraphrasing"):
-        # st.selectbox('Model',  ['T5', 'GPT Neo-X'])
         left_column, right_column = st.columns(2)
         count = left_column.slider('Count', 0, 10, 3)
         temperature = right_column.slider('Temperature', 0.0, 10.0, 1.5)
-        text_input = st.text_area("Input Text")
         submitted = st.form_submit_button("Generate")
         if submitted:
             with st.spinner('Wait for it...'):
-                paraphrase_model = PegasusParaphraser(num_return_sequences=count,temperature=temperature)
                 result = paraphrase_model.paraphrase(text_input)
             st.write(result)
-#if st.button('Generate'):
-    # st.write(input)
-    #st.success("We have generated 105 Questions for you")
-    # st.snow()
-##else:
-    ##nothing here

 import streamlit as st
+from transformers import T5ForConditionalGeneration, T5Tokenizer
 from fill_in_summary import FillInSummary
 from paraphrase import PegasusParaphraser
+import question_gen as q
+default_text = "Apple was founded as Apple Computer Company on April 1, 1976, by Steve Jobs, Steve Wozniak and Ronald " \
+               "Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and " \
+               "Wozniak as Apple Computer, Inc. in 1977 and the company's next computer, the Apple II became a best " \
+               "seller. Apple went public in 1980, to instant financial success. The company went onto develop new " \
+               "computers featuring innovative graphical user interfaces, including the original Macintosh, " \
+               "announced in a critically acclaimed advertisement, '1984', directed by Ridley Scott. By 1985, " \
+               "the high cost of its products and power struggles between executives caused problems. Wozniak stepped " \
+               "back from Apple amicably, while Jobs resigned to found NeXT, taking some Apple employees with him. "
+default_text2 = "The board of directors instructed Sculley to contain Jobs and his ability to launch expensive forays " \
+                "into untested products "
 st.set_page_config(layout="centered")
 st.title('Question Generator by Eddevs')
+st.write('Please select the task you want to do.')
+select = st.selectbox('Type', ['Question Generator', 'Paraphrasing', 'Summarization', 'Fill in the blank'])
 if select == "Question Generator":
     with st.form("question_gen"):
+        left_column, right_column = st.columns(2)
+        num_seq = left_column.slider('Question Count', 0, 10, 3)
+        beams = right_column.slider('Beams', 0, 10, 5)
+        max_length = st.slider('Max Length', 0, 1024, 300)
+        text_input = st.text_area("Input Text", value=default_text)
         submitted = st.form_submit_button("Generate")
+        if submitted:
+            with st.spinner('Wait for it...'):
+                question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
+                question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
+                result = q.get_question(text_input, "", question_model, question_tokenizer, num_seq, beams, max_length)
+            st.write(result)
 elif select == "Summarization":
     with st.form("summarization"):
+        text_input = st.text_area("Input Text", value=default_text)
         submitted = st.form_submit_button("Generate")
 elif select == "Fill in the blank":
     with st.form("fill_in_the_blank"):
+        text_input = st.text_area("Input Text", value=default_text)
         submitted = st.form_submit_button("Generate")
 elif select == "Paraphrasing":
     with st.form("paraphrasing"):
         left_column, right_column = st.columns(2)
         count = left_column.slider('Count', 0, 10, 3)
         temperature = right_column.slider('Temperature', 0.0, 10.0, 1.5)
+        text_input = st.text_area("Input Text", value=default_text2)
         submitted = st.form_submit_button("Generate")
         if submitted:
             with st.spinner('Wait for it...'):
+                paraphrase_model = PegasusParaphraser(num_return_sequences=count, temperature=temperature)
                 result = paraphrase_model.paraphrase(text_input)
             st.write(result)

question_gen.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# beams = 5, return_seq = 1, max_length = 300
+def get_question(sentence,answer,mdl,tknizer, num_seq, num_beams, max_length):
+    if num_seq > num_beams:
+        num_seq = num_beams
+    prompt = "context: {} answer: {}".format(sentence,answer)
+    print (prompt)
+    max_len = 256
+    encoding = tknizer.encode_plus(prompt,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt")
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    outs = mdl.generate(input_ids=input_ids,
+                        attention_mask=attention_mask,
+                        early_stopping=True,
+                        num_beams=num_beams,
+                        num_return_sequences=num_seq,
+                        no_repeat_ngram_size=2,
+                        max_length=max_length)
+    dec = [tknizer.decode(ids,skip_special_tokens=True) for ids in outs]
+    Question = dec[0].replace("question:", "")
+    Question = Question.strip()
+    return Question