File size: 4,569 Bytes
0a3c18c
40af609
 
bc71c8e
40af609
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e40787
 
15db017
dca4cea
40af609
 
 
 
 
 
 
 
 
 
 
 
 
463ac4a
 
 
 
 
 
 
40af609
 
 
463ac4a
40af609
 
 
 
 
699a98b
40af609
699a98b
40af609
 
463ac4a
 
bc71c8e
699a98b
463ac4a
40af609
 
 
 
a7c634c
40af609
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699a98b
dd23ffe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
from textwrap3 import wrap
from flashtext import KeywordProcessor
import torch, random, nltk, string, traceback, sys, os, requests, datetime
import numpy as np
import pandas as pd
from transformers import T5ForConditionalGeneration,T5Tokenizer
import pke
from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\
    get_question, get_related_word, get_final_option_list, load_raw_text


def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

@st.cache(allow_output_mutation = True)
def load_model():
    nltk.download('punkt')
    nltk.download('brown')
    nltk.download('wordnet')
    nltk.download('stopwords')
    nltk.download('wordnet')
    nltk.download('omw-1.4')
    ## summary_mod_name = os.environ["summary_mod_name"]
    ## question_mod_name = os.environ["question_mod_name"]
    summary_mod_name = "mrm8488/t5-base-finetuned-summarize-news"
    question_mod_name = "mrm8488/t5-base-finetuned-question-generation-ap"
    summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name)
    summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    summary_model = summary_model.to(device)
    question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name)
    question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name)
    question_model = question_model.to(device)
    return summary_model, summary_tokenizer, question_tokenizer, question_model 

from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords

# def csv_downloader(df):
#    res = df.to_csv(index=False,sep="\t").encode('utf-8')
#    st.download_button(
#    label="Download logs data as CSV separated by tab",
#    data=res,
#    file_name='df_quiz_log_file_v1.csv',
#    mime='text/csv')

def load_file():
    """Load text from file"""
    uploaded_file = st.file_uploader("Paste text",type=['txt'])
    if uploaded_file is not None:
        if uploaded_file.type == "text/plain":
            raw_text = str(uploaded_file.read(),"utf-8")
        return raw_text


# Loading Model
summary_model, summary_tokenizer, question_tokenizer, question_model =load_model()

# App title and description
st.title("P's Prototye")
st.write("Get multiple choice questions from random facts")

# Load file
st.text("Disclaimer: This is early version. sorry if there's still bugs")

# Load file

default_text = load_raw_text()
raw_text = st.text_area("Enter text here - press Ctrl + enter to submit", default_text, height=100, max_chars=1000, )

# raw_text = load_file()
start_time = str(datetime.datetime.now())
if raw_text != None and raw_text != '':
    summary_text = summarizer(raw_text,summary_model,summary_tokenizer)
    ans_list =  get_keywords(raw_text,summary_text)
    #print("Ans list: {}".format(ans_list))
    questions = []
    option1=[]
    option2=[]
    option3=[]
    option4=[]
    for idx,ans in enumerate(ans_list):
        #print("IDX: {}, ANS: {}".format(idx, ans))
        ques = get_question(summary_text,ans,question_model,question_tokenizer)
        other_options = get_related_word(ans)
        final_options, ans_index = get_final_option_list(ans,other_options)
        option1.append(final_options[0])
        option2.append(final_options[1])
        option3.append(final_options[2])
        option4.append(final_options[3])                   
        if ques not in questions:
            html_str = f"""
            <div>
            <p>
            {idx+1}: <b> {ques} </b>
            </p>
            </div>
            """
            html_str += f' <p style="color:Green;"><b> {final_options[0]} </b></p> ' if ans_index == 0 else f' <p><b> {final_options[0]} </b></p> '
            html_str += f' <p style="color:Green;"><b> {final_options[1]} </b></p> ' if ans_index == 1 else f' <p><b> {final_options[1]} </b></p> '
            html_str += f' <p style="color:Green;"><b> {final_options[2]} </b></p> ' if ans_index == 2 else f' <p><b> {final_options[2]} </b></p> '
            html_str += f' <p style="color:Green;"><b> {final_options[3]} </b></p> ' if ans_index == 3 else f' <p><b> {final_options[3]} </b></p> '
            html_str += f"""
            """
            st.markdown(html_str , unsafe_allow_html=True)
            st.markdown("-----")
        questions.append(ques)
    # st.dataframe(pd.read_csv(output_path,sep="\t").tail(5))