import streamlit as st from textwrap3 import wrap from flashtext import KeywordProcessor import torch, random, nltk, string, traceback, sys, os, requests, datetime import numpy as np import pandas as pd from transformers import T5ForConditionalGeneration,T5Tokenizer import pke from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\ get_question, get_related_word, get_final_option_list, load_raw_text def set_seed(seed: int): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) set_seed(42) @st.cache(allow_output_mutation = True) def load_model(): nltk.download('punkt') nltk.download('brown') nltk.download('wordnet') nltk.download('stopwords') nltk.download('wordnet') nltk.download('omw-1.4') ## summary_mod_name = os.environ["summary_mod_name"] ## question_mod_name = os.environ["question_mod_name"] summary_mod_name = "mrm8488/t5-base-finetuned-summarize-news" question_mod_name = "mrm8488/t5-base-finetuned-question-generation-ap" summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name) summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") summary_model = summary_model.to(device) question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name) question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name) question_model = question_model.to(device) return summary_model, summary_tokenizer, question_tokenizer, question_model from nltk.corpus import wordnet as wn from nltk.tokenize import sent_tokenize from nltk.corpus import stopwords # def csv_downloader(df): # res = df.to_csv(index=False,sep="\t").encode('utf-8') # st.download_button( # label="Download logs data as CSV separated by tab", # data=res, # file_name='df_quiz_log_file_v1.csv', # mime='text/csv') def load_file(): """Load text from file""" uploaded_file = st.file_uploader("Paste text",type=['txt']) if uploaded_file is not None: if uploaded_file.type == "text/plain": raw_text = str(uploaded_file.read(),"utf-8") return raw_text # Loading Model summary_model, summary_tokenizer, question_tokenizer, question_model =load_model() # App title and description st.title("P's Prototye") st.write("Get multiple choice questions from random facts") # Load file st.text("Disclaimer: This is early version. sorry if there's still bugs") # Load file default_text = load_raw_text() raw_text = st.text_area("Enter text here - press Ctrl + enter to submit", default_text, height=100, max_chars=1000, ) # raw_text = load_file() start_time = str(datetime.datetime.now()) if raw_text != None and raw_text != '': summary_text = summarizer(raw_text,summary_model,summary_tokenizer) ans_list = get_keywords(raw_text,summary_text) #print("Ans list: {}".format(ans_list)) questions = [] option1=[] option2=[] option3=[] option4=[] for idx,ans in enumerate(ans_list): #print("IDX: {}, ANS: {}".format(idx, ans)) ques = get_question(summary_text,ans,question_model,question_tokenizer) other_options = get_related_word(ans) final_options, ans_index = get_final_option_list(ans,other_options) option1.append(final_options[0]) option2.append(final_options[1]) option3.append(final_options[2]) option4.append(final_options[3]) if ques not in questions: html_str = f"""

{idx+1}: {ques}

""" html_str += f'

{final_options[0]}

' if ans_index == 0 else f'

{final_options[0]}

' html_str += f'

{final_options[1]}

' if ans_index == 1 else f'

{final_options[1]}

' html_str += f'

{final_options[2]}

' if ans_index == 2 else f'

{final_options[2]}

' html_str += f'

{final_options[3]}

' if ans_index == 3 else f'

{final_options[3]}

' html_str += f""" """ st.markdown(html_str , unsafe_allow_html=True) st.markdown("-----") questions.append(ques) # st.dataframe(pd.read_csv(output_path,sep="\t").tail(5))