Christian Koch commited on
Commit
cd3659c
1 Parent(s): 479b050

further improvements, implement question generator

Browse files
Files changed (2) hide show
  1. app.py +30 -76
  2. question_gen.py +26 -0
app.py CHANGED
@@ -1,84 +1,52 @@
1
  import streamlit as st
2
- from transformers import pipeline, PegasusForConditionalGeneration, PegasusTokenizer, AutoTokenizer, AutoModelForSeq2SeqLM
3
- import nltk
4
 
5
  from fill_in_summary import FillInSummary
6
  from paraphrase import PegasusParaphraser
7
- import question_generator as q
8
 
9
- nltk.download('punkt')
10
 
11
- tokenizer = AutoTokenizer.from_pretrained("google/mt5-small")
 
 
 
 
 
 
 
 
 
 
12
 
13
- model = AutoModelForSeq2SeqLM.from_pretrained("google/mt5-small")
14
 
15
 
16
  st.set_page_config(layout="centered")
17
  st.title('Question Generator by Eddevs')
 
18
 
19
- select = st.selectbox('Type', ['Question Generator', 'Paraphrasing', 'Summarization', 'Fill in the blank'])
20
-
21
 
22
  if select == "Question Generator":
23
  with st.form("question_gen"):
24
- # left_column, right_column = st.columns(2)
25
- # left_column.selectbox('Type', ['Question Generator', 'Paraphrasing'])
26
- #st.selectbox('Model', ['T5', 'GPT Neo-X'])
27
-
28
- text_input = st.text_area("Input Text")
29
 
30
  submitted = st.form_submit_button("Generate")
 
 
 
 
31
 
32
- split = st.checkbox('Split into sentences', value=True)
33
-
34
- if split:
35
- # Split into sentences
36
- sent_tokenized = nltk.sent_tokenize(text_input)
37
- res = {}
38
-
39
- with st.spinner('Please wait while the inputs are being processed...'):
40
- # Iterate over sentences
41
- for sentence in sent_tokenized:
42
- predictions = model.multitask([sentence], max_length=512)
43
- questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions[
44
- 'answers_bis']
45
-
46
- # Build answer dict
47
- content = {}
48
- for question, answer, answer_bis in zip(questions[0], answers[0], answers_bis[0]):
49
- content[question] = {'answer (extracted)': answer, 'answer (generated)': answer_bis}
50
- res[sentence] = content
51
-
52
- # Answer area
53
- st.write(res)
54
-
55
- else:
56
- with st.spinner('Please wait while the inputs are being processed...'):
57
- # Prediction
58
- predictions = model.multitask([text_input], max_length=512)
59
- questions, answers, answers_bis = predictions['questions'], predictions['answers'], predictions[
60
- 'answers_bis']
61
-
62
- # Answer area
63
- zip = zip(questions[0], answers[0], answers_bis[0])
64
- content = {}
65
- for question, answer, answer_bis in zip:
66
- content[question] = {'answer (extracted)': answer, 'answer (generated)': answer_bis}
67
-
68
- st.write(content)
69
- if submitted:
70
- with st.spinner('Wait for it...'):
71
- result = FillInSummary().summarize(text_input)
72
- st.write(text_input)
73
 
74
 
75
  elif select == "Summarization":
76
  with st.form("summarization"):
77
- # left_column, right_column = st.columns(2)
78
- # left_column.selectbox('Type', ['Question Generator', 'Paraphrasing'])
79
- #st.selectbox('Model', ['T5', 'GPT Neo-X'])
80
-
81
- text_input = st.text_area("Input Text")
82
 
83
  submitted = st.form_submit_button("Generate")
84
 
@@ -90,7 +58,7 @@ elif select == "Summarization":
90
 
91
  elif select == "Fill in the blank":
92
  with st.form("fill_in_the_blank"):
93
- text_input = st.text_area("Input Text")
94
 
95
  submitted = st.form_submit_button("Generate")
96
 
@@ -104,29 +72,15 @@ elif select == "Fill in the blank":
104
 
105
  elif select == "Paraphrasing":
106
  with st.form("paraphrasing"):
107
- # st.selectbox('Model', ['T5', 'GPT Neo-X'])
108
  left_column, right_column = st.columns(2)
109
  count = left_column.slider('Count', 0, 10, 3)
110
  temperature = right_column.slider('Temperature', 0.0, 10.0, 1.5)
111
- text_input = st.text_area("Input Text")
112
 
113
  submitted = st.form_submit_button("Generate")
114
 
115
  if submitted:
116
  with st.spinner('Wait for it...'):
117
- paraphrase_model = PegasusParaphraser(num_return_sequences=count,temperature=temperature)
118
  result = paraphrase_model.paraphrase(text_input)
119
  st.write(result)
120
-
121
-
122
-
123
-
124
-
125
- #if st.button('Generate'):
126
- # st.write(input)
127
- #st.success("We have generated 105 Questions for you")
128
- # st.snow()
129
- ##else:
130
- ##nothing here
131
-
132
-
 
1
  import streamlit as st
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
 
3
 
4
  from fill_in_summary import FillInSummary
5
  from paraphrase import PegasusParaphraser
6
+ import question_gen as q
7
 
 
8
 
9
+ default_text = "Apple was founded as Apple Computer Company on April 1, 1976, by Steve Jobs, Steve Wozniak and Ronald " \
10
+ "Wayne to develop and sell Wozniak's Apple I personal computer. It was incorporated by Jobs and " \
11
+ "Wozniak as Apple Computer, Inc. in 1977 and the company's next computer, the Apple II became a best " \
12
+ "seller. Apple went public in 1980, to instant financial success. The company went onto develop new " \
13
+ "computers featuring innovative graphical user interfaces, including the original Macintosh, " \
14
+ "announced in a critically acclaimed advertisement, '1984', directed by Ridley Scott. By 1985, " \
15
+ "the high cost of its products and power struggles between executives caused problems. Wozniak stepped " \
16
+ "back from Apple amicably, while Jobs resigned to found NeXT, taking some Apple employees with him. "
17
+
18
+ default_text2 = "The board of directors instructed Sculley to contain Jobs and his ability to launch expensive forays " \
19
+ "into untested products "
20
 
 
21
 
22
 
23
  st.set_page_config(layout="centered")
24
  st.title('Question Generator by Eddevs')
25
+ st.write('Please select the task you want to do.')
26
 
27
+ select = st.selectbox('Type', ['Question Generator', 'Paraphrasing', 'Summarization', 'Fill in the blank'])
 
28
 
29
  if select == "Question Generator":
30
  with st.form("question_gen"):
31
+ left_column, right_column = st.columns(2)
32
+ num_seq = left_column.slider('Question Count', 0, 10, 3)
33
+ beams = right_column.slider('Beams', 0, 10, 5)
34
+ max_length = st.slider('Max Length', 0, 1024, 300)
35
+ text_input = st.text_area("Input Text", value=default_text)
36
 
37
  submitted = st.form_submit_button("Generate")
38
+ if submitted:
39
+ with st.spinner('Wait for it...'):
40
+ question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
41
+ question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
42
 
43
+ result = q.get_question(text_input, "", question_model, question_tokenizer, num_seq, beams, max_length)
44
+ st.write(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
  elif select == "Summarization":
48
  with st.form("summarization"):
49
+ text_input = st.text_area("Input Text", value=default_text)
 
 
 
 
50
 
51
  submitted = st.form_submit_button("Generate")
52
 
 
58
 
59
  elif select == "Fill in the blank":
60
  with st.form("fill_in_the_blank"):
61
+ text_input = st.text_area("Input Text", value=default_text)
62
 
63
  submitted = st.form_submit_button("Generate")
64
 
 
72
 
73
  elif select == "Paraphrasing":
74
  with st.form("paraphrasing"):
 
75
  left_column, right_column = st.columns(2)
76
  count = left_column.slider('Count', 0, 10, 3)
77
  temperature = right_column.slider('Temperature', 0.0, 10.0, 1.5)
78
+ text_input = st.text_area("Input Text", value=default_text2)
79
 
80
  submitted = st.form_submit_button("Generate")
81
 
82
  if submitted:
83
  with st.spinner('Wait for it...'):
84
+ paraphrase_model = PegasusParaphraser(num_return_sequences=count, temperature=temperature)
85
  result = paraphrase_model.paraphrase(text_input)
86
  st.write(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
question_gen.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # beams = 5, return_seq = 1, max_length = 300
3
+ def get_question(sentence,answer,mdl,tknizer, num_seq, num_beams, max_length):
4
+ if num_seq > num_beams:
5
+ num_seq = num_beams
6
+
7
+ prompt = "context: {} answer: {}".format(sentence,answer)
8
+ print (prompt)
9
+ max_len = 256
10
+ encoding = tknizer.encode_plus(prompt,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt")
11
+
12
+ input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
13
+
14
+ outs = mdl.generate(input_ids=input_ids,
15
+ attention_mask=attention_mask,
16
+ early_stopping=True,
17
+ num_beams=num_beams,
18
+ num_return_sequences=num_seq,
19
+ no_repeat_ngram_size=2,
20
+ max_length=max_length)
21
+
22
+ dec = [tknizer.decode(ids,skip_special_tokens=True) for ids in outs]
23
+
24
+ Question = dec[0].replace("question:", "")
25
+ Question = Question.strip()
26
+ return Question