Spaces:

DevBM
/

QGen

Sleeping

App Files Files Community

DevBM commited on Oct 16, 2024

Commit

e84f648

verified ·

1 Parent(s): 240f2d4

Reverting to Jul19 Commit

Browse files

Files changed (1) hide show

app.py +614 -28

app.py CHANGED Viewed

@@ -1,10 +1,50 @@
 import nltk
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('brown')
 nltk.download('wordnet')
-import streamlit as st
 st.set_page_config(
     page_icon='cyclone',
@@ -15,19 +55,62 @@ st.set_page_config(
     }
 )
-from text_processing import clean_text, get_pdf_text
-from question_generation import generate_questions_async
-from visualization import display_word_cloud
-from data_export import export_to_csv, export_to_pdf
-from feedback import collect_feedback, analyze_feedback, export_feedback_data
-from utils import get_session_id, initialize_state, get_state, set_state, display_info, QuestionGenerationError, entity_linking
-import asyncio
-import time
-import pandas as pd
-from data_export import send_email_with_attachment
-st.set_option('deprecation.showPyplotGlobalUse',False)
 with st.sidebar:
     select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
@@ -35,8 +118,514 @@ if select_model == "T5-large":
     modelname = "DevBM/t5-large-squad"
 elif select_model == "T5-small":
     modelname = "AneriThakkar/flan-t5-small-finetuned"
 def main():
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
     state = initialize_state(session_id)
@@ -44,18 +633,18 @@ def main():
         st.session_state.feedback_data = []
     with st.sidebar:
-        show_info = st.toggle('Show Info',False)
         if show_info:
             display_info()
         st.subheader("Customization Options")
         # Customization options
         input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
         with st.expander("Choose the Additional Elements to show"):
-            show_context = st.checkbox("Context",False)
             show_answer = st.checkbox("Answer",True)
-            show_options = st.checkbox("Options",True)
             show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
-            show_qa_scores = st.checkbox("QA Score",True)
             show_blank_question = st.checkbox("Fill in the Blank Questions",True)
         num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
         context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
@@ -81,15 +670,15 @@ def main():
         text = clean_text(text)
     with st.expander("Show text"):
         st.write(text)
-        # st.text(text)
     generate_questions_button = st.button("Generate Questions",help="This is the generate questions button")
     # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
     if generate_questions_button and text:
         start_time = time.time()
         with st.spinner("Generating questions..."):
             try:
-                state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords,modelname))
                 if not state['generated_questions']:
                     st.warning("No questions were generated. The text might be too short or lack suitable content.")
                 else:
@@ -150,16 +739,12 @@ def main():
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']:
-            with st.sidebar:
-                # Adding error handling while exporting the files
-                # ---------------------------------------------------------------------
-                try:
-                    csv_data = export_to_csv(state['generated_questions'])
-                    st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
-                    pdf_data = export_to_pdf(state['generated_questions'])
-                    st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
-                except Exception as e:
-                    st.error(f"Error exporting CSV: {e}")
             with st.expander("View Visualizations"):
                 questions = [tpl['question'] for tpl in state['generated_questions']]
@@ -170,6 +755,7 @@ def main():
                 overall_scores = pd.DataFrame(overall_scores,columns=['Overall Scores'])
                 st.line_chart(overall_scores)
     # View Feedback Statistics
     with st.expander("View Feedback Statistics"):
         analyze_feedback()

+import streamlit as st
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import spacy
 import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+from rake_nltk import Rake
+import pandas as pd
+from fpdf import FPDF
+import wikipediaapi
+from functools import lru_cache
 nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('brown')
+from nltk.tokenize import sent_tokenize
 nltk.download('wordnet')
+from nltk.corpus import wordnet
+import random
+import sense2vec
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import json
+import os
+from sentence_transformers import SentenceTransformer, util
+import textstat
+from spellchecker import SpellChecker
+from transformers import pipeline
+import re
+import pymupdf
+import uuid
+import time
+import asyncio
+import aiohttp
+from datetime import datetime
+import base64
+from io import BytesIO
+# '-----------------'
+import smtplib
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from email.mime.base import MIMEBase
+from email.mime.application import MIMEApplication
+from email import encoders
+# '------------------'
+from gliner import GLiNER
+# -------------------
+print("***************************************************************")
 st.set_page_config(
     page_icon='cyclone',
     }
 )
+st.set_option('deprecation.showPyplotGlobalUse',False)
+class QuestionGenerationError(Exception):
+    """Custom exception for question generation errors."""
+    pass
+# Initialize Wikipedia API with a user agent
+user_agent = 'QGen/1.2'
+wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
+def get_session_id():
+    if 'session_id' not in st.session_state:
+        st.session_state.session_id = str(uuid.uuid4())
+    return st.session_state.session_id
+def initialize_state(session_id):
+    if 'session_states' not in st.session_state:
+        st.session_state.session_states = {}
+    if session_id not in st.session_state.session_states:
+        st.session_state.session_states[session_id] = {
+            'generated_questions': [],
+            # add other state variables as needed
+        }
+    return st.session_state.session_states[session_id]
+def get_state(session_id):
+    return st.session_state.session_states[session_id]
+def set_state(session_id, key, value):
+    st.session_state.session_states[session_id][key] = value
+@st.cache_resource
+def load_model(modelname):
+    model_name = modelname
+    model = T5ForConditionalGeneration.from_pretrained(model_name)
+    tokenizer = T5Tokenizer.from_pretrained(model_name)
+    return model, tokenizer
+# Load Spacy Model
+@st.cache_resource
+def load_nlp_models():
+    nlp = spacy.load("en_core_web_md")
+    s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
+    return nlp, s2v
+# Load Quality Assurance Models
+@st.cache_resource
+def load_qa_models():
+    # Initialize BERT model for sentence similarity
+    similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
+    spell = SpellChecker()
+    return similarity_model, spell
 with st.sidebar:
     select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
     modelname = "DevBM/t5-large-squad"
 elif select_model == "T5-small":
     modelname = "AneriThakkar/flan-t5-small-finetuned"
+nlp, s2v = load_nlp_models()
+similarity_model, spell = load_qa_models()
+context_model = similarity_model
+model, tokenizer = load_model(modelname)
+# Info Section
+def display_info():
+    st.sidebar.title("Information")
+    st.sidebar.markdown("""
+        ### Question Generator System
+        This system is designed to generate questions based on the provided context. It uses various NLP techniques and models to:
+        - Extract keywords from the text
+        - Map keywords to sentences
+        - Generate questions
+        - Provide multiple choice options
+        - Assess the quality of generated questions
+        #### Key Features:
+        - **Keyword Extraction:** Combines RAKE, TF-IDF, and spaCy for comprehensive keyword extraction.
+        - **Question Generation:** Utilizes a pre-trained T5 model for generating questions.
+        - **Options Generation:** Creates contextually relevant multiple-choice options.
+        - **Question Assessment:** Scores questions based on relevance, complexity, and spelling correctness.
+        - **Feedback Collection:** Allows users to rate the generated questions and provides statistics on feedback.
+        #### Customization Options:
+        - Number of beams for question generation
+        - Context window size for mapping keywords to sentences
+        - Number of questions to generate
+        - Additional display elements (context, answer, options, entity link, QA scores)
+        #### Outputs:
+        - Generated questions with multiple-choice options
+        - Download options for CSV and PDF formats
+        - Visualization of overall scores
+    """)
+def get_pdf_text(pdf_file):
+    doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
+    text = ""
+    for page_num in range(doc.page_count):
+        page = doc.load_page(page_num)
+        text += page.get_text()
+    return text
+def save_feedback_og(question, answer, rating, options, context):
+    feedback_file = 'question_feedback.json'
+    if os.path.exists(feedback_file):
+        with open(feedback_file, 'r') as f:
+            feedback_data = json.load(f)
+    else:
+        feedback_data = []
+    tpl = {
+        'question' : question,
+        'answer' : answer,
+        'context' : context,
+        'options' : options,
+        'rating' : rating,
+    }
+    # feedback_data[question] = rating
+    feedback_data.append(tpl)
+    print(feedback_data)
+    with open(feedback_file, 'w') as f:
+        json.dump(feedback_data, f)
+    return feedback_file
+# -----------------------------------------------------------------------------------------
+def send_email_with_attachment(email_subject, email_body, recipient_emails, sender_email, sender_password, attachment):
+    smtp_server = "smtp.gmail.com"  # Replace with your SMTP server
+    smtp_port = 587  # Replace with your SMTP port
+    # Create the email message
+    message = MIMEMultipart()
+    message['From'] = sender_email
+    message['To'] = ", ".join(recipient_emails)
+    message['Subject'] = email_subject
+    message.attach(MIMEText(email_body, 'plain'))
+    # Attach the feedback data if available
+    if attachment:
+        attachment_part = MIMEApplication(attachment.getvalue(), Name="feedback_data.json")
+        attachment_part['Content-Disposition'] = f'attachment; filename="feedback_data.json"'
+        message.attach(attachment_part)
+    # Send the email
+    try:
+        with smtplib.SMTP(smtp_server, smtp_port) as server:
+            server.starttls()
+            print(sender_email)
+            print(sender_password)
+            server.login(sender_email, sender_password)
+            text = message.as_string()
+            server.sendmail(sender_email, recipient_emails, text)
+        return True
+    except Exception as e:
+        st.error(f"Failed to send email: {str(e)}")
+        return False
+# ----------------------------------------------------------------------------------
+def collect_feedback(i,question, answer, context, options):
+    st.write("Please provide feedback for this question:")
+    edited_question = st.text_input("Enter improved question",value=question,key=f'fdx1{i}')
+    clarity = st.slider("Clarity", 1, 5, 3, help="1 = Very unclear, 5 = Very clear",key=f'fdx2{i}')
+    difficulty = st.slider("Difficulty", 1, 5, 3, help="1 = Very easy, 5 = Very difficult",key=f'fdx3{i}')
+    relevance = st.slider("Relevance", 1, 5, 3, help="1 = Not relevant, 5 = Highly relevant",key=f'fdx4{i}')
+    option_quality = st.slider("Quality of Options", 1, 5, 3, help="1 = Poor options, 5 = Excellent options",key=f'fdx5{i}')
+    overall_rating = st.slider("Overall Rating", 1, 5, 3, help="1 = Poor, 5 = Excellent",key=f'fdx6{i}')
+    comments = st.text_input("Additional Comments", "",key=f'fdx7{i}')
+    if st.button("Submit Feedback",key=f'fdx8{i}'):
+        feedback = {
+            "question": question,
+            'edited_question':edited_question,
+            "answer": answer,
+            "options": options,
+            "clarity": clarity,
+            "difficulty": difficulty,
+            "relevance": relevance,
+            "option_quality": option_quality,
+            "overall_rating": overall_rating,
+            "comments": comments
+        }
+        save_feedback(feedback)
+        st.success("Thank you for your feedback!")
+def save_feedback(feedback):
+    st.session_state.feedback_data.append(feedback)
+def analyze_feedback():
+    if not st.session_state.feedback_data:
+        st.warning("No feedback data available yet.")
+        return
+    df = pd.DataFrame(st.session_state.feedback_data)
+    st.write("Feedback Analysis")
+    st.write(f"Total feedback collected: {len(df)}")
+    metrics = ['clarity', 'difficulty', 'relevance', 'option_quality', 'overall_rating']
+    for metric in metrics:
+        fig, ax = plt.subplots()
+        df[metric].value_counts().sort_index().plot(kind='bar', ax=ax)
+        plt.title(f"Distribution of {metric.capitalize()} Ratings")
+        plt.xlabel("Rating")
+        plt.ylabel("Count")
+        st.pyplot(fig)
+    st.write("Average Ratings:")
+    st.write(df[metrics].mean())
+    # Word cloud of comments
+    comments = " ".join(df['comments'])
+    if len(comments) > 1:
+        wordcloud = WordCloud(width=800, height=400, background_color='white').generate(comments)
+        fig, ax = plt.subplots()
+        plt.imshow(wordcloud, interpolation='bilinear')
+        plt.axis("off")
+        st.pyplot(fig)
+def export_feedback_data():
+    if not st.session_state.feedback_data:
+        st.warning("No feedback data available.")
+        return None
+    # Convert feedback data to JSON
+    json_data = json.dumps(st.session_state.feedback_data, indent=2)
+    # Create a BytesIO object
+    buffer = BytesIO()
+    buffer.write(json_data.encode())
+    buffer.seek(0)
+    return buffer
+# Function to clean text
+def clean_text(text):
+    text = re.sub(r"[^\x00-\x7F]", " ", text)
+    text = re.sub(f"[\n]"," ", text)
+    return text
+# Function to create text chunks
+def segment_text(text, max_segment_length=700, batch_size=7):
+    sentences = sent_tokenize(text)
+    segments = []
+    current_segment = ""
+    for sentence in sentences:
+        if len(current_segment) + len(sentence) <= max_segment_length:
+            current_segment += sentence + " "
+        else:
+            segments.append(current_segment.strip())
+            current_segment = sentence + " "
+    if current_segment:
+        segments.append(current_segment.strip())
+    # Create batches
+    batches = [segments[i:i + batch_size] for i in range(0, len(segments), batch_size)]
+    return batches
+# Function to extract keywords using combined techniques
+def extract_keywords(text, extract_all):
+    try:
+        gliner_model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5")
+        labels = ["person", "organization", "email", "Award", "Date", "Competitions", "Teams", "location", "percentage", "money"]
+        entities = gliner_model.predict_entities(text, labels, threshold=0.7)
+        gliner_keywords = list(set([ent["text"] for ent in entities]))
+        print(f"Gliner keywords:{gliner_keywords}")
+        # Use Only Gliner Entities
+        if extract_all is False:
+            return list(gliner_keywords)
+        doc = nlp(text)
+        spacy_keywords = set([ent.text for ent in doc.ents])
+        spacy_entities = spacy_keywords
+        print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
+        #
+        # if extract_all is False:
+        #     return list(spacy_entities)
+        # Use RAKE
+        rake = Rake()
+        rake.extract_keywords_from_text(text)
+        rake_keywords = set(rake.get_ranked_phrases())
+        print(f"\n\nRake Keywords: {rake_keywords} \n\n")
+        # Use spaCy for NER and POS tagging
+        spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
+        print(f"\n\nSpacy Keywords: {spacy_keywords} \n\n")
+        # Use TF-IDF
+        vectorizer = TfidfVectorizer(stop_words='english')
+        X = vectorizer.fit_transform([text])
+        tfidf_keywords = set(vectorizer.get_feature_names_out())
+        print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
+        # Combine all keywords
+        combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords).union(gliner_keywords)
+        return list(combined_keywords)
+    except Exception as e:
+        raise QuestionGenerationError(f"Error in keyword extraction: {str(e)}")
+def get_similar_words_sense2vec(word, n=3):
+    # Try to find the word with its most likely part-of-speech
+    word_with_pos = word + "|NOUN"
+    if word_with_pos in s2v:
+        similar_words = s2v.most_similar(word_with_pos, n=n)
+        return [word.split("|")[0] for word, _ in similar_words]
+    # If not found, try without POS
+    if word in s2v:
+        similar_words = s2v.most_similar(word, n=n)
+        return [word.split("|")[0] for word, _ in similar_words]
+    return []
+def get_synonyms(word, n=3):
+    synonyms = []
+    for syn in wordnet.synsets(word):
+        for lemma in syn.lemmas():
+            if lemma.name() != word and lemma.name() not in synonyms:
+                synonyms.append(lemma.name())
+                if len(synonyms) == n:
+                    return synonyms
+    return synonyms
+def generate_options(answer, context, n=3):
+    options = [answer]
+    # Add contextually relevant words using a pre-trained model
+    context_embedding = context_model.encode(context)
+    answer_embedding = context_model.encode(answer)
+    context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
+    # Compute similarity scores and sort context words
+    similarity_scores = [util.pytorch_cos_sim(context_model.encode(word), answer_embedding).item() for word in context_words]
+    sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
+    options.extend(sorted_context_words[:n])
+    # Try to get similar words based on sense2vec
+    similar_words = get_similar_words_sense2vec(answer, n)
+    options.extend(similar_words)
+    # If we don't have enough options, try synonyms
+    if len(options) < n + 1:
+        synonyms = get_synonyms(answer, n - len(options) + 1)
+        options.extend(synonyms)
+    # If we still don't have enough options, extract other entities from the context
+    if len(options) < n + 1:
+        doc = nlp(context)
+        entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
+        options.extend(entities[:n - len(options) + 1])
+    # If we still need more options, add some random words from the context
+    if len(options) < n + 1:
+        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
+        options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
+    print(f"\n\nAll Possible Options: {options}\n\n")
+    # Ensure we have the correct number of unique options
+    options = list(dict.fromkeys(options))[:n+1]
+    # Shuffle the options
+    random.shuffle(options)
+    return options
+# Function to map keywords to sentences with customizable context window size
+def map_keywords_to_sentences(text, keywords, context_window_size):
+    sentences = sent_tokenize(text)
+    keyword_sentence_mapping = {}
+    print(f"\n\nSentences: {sentences}\n\n")
+    for keyword in keywords:
+        for i, sentence in enumerate(sentences):
+            if keyword in sentence:
+                # Combine current sentence with surrounding sentences for context
+                # start = max(0, i - context_window_size)
+                # end = min(len(sentences), i + context_window_size + 1)
+                start = max(0,i - context_window_size)
+                context_sentenses = sentences[start:i+1]
+                context = ' '.join(context_sentenses)
+                # context = ' '.join(sentences[start:end])
+                if keyword not in keyword_sentence_mapping:
+                    keyword_sentence_mapping[keyword] = context
+                else:
+                    keyword_sentence_mapping[keyword] += ' ' + context
+    return keyword_sentence_mapping
+# Function to perform entity linking using Wikipedia API
+@lru_cache(maxsize=128)
+def entity_linking(keyword):
+    page = wiki_wiki.page(keyword)
+    if page.exists():
+        return page.fullurl
+    return None
+async def generate_question_async(context, answer, num_beams):
+    try:
+        input_text = f"<context> {context} <answer> {answer}"
+        print(f"\n{input_text}\n")
+        input_ids = tokenizer.encode(input_text, return_tensors='pt')
+        outputs = await asyncio.to_thread(model.generate, input_ids, num_beams=num_beams, early_stopping=True, max_length=250)
+        question = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        print(f"\n{question}\n")
+        return question
+    except Exception as e:
+        raise QuestionGenerationError(f"Error in question generation: {str(e)}")
+async def generate_options_async(answer, context, n=3):
+    try:
+        options = [answer]
+        # Add contextually relevant words using a pre-trained model
+        context_embedding = await asyncio.to_thread(context_model.encode, context)
+        answer_embedding = await asyncio.to_thread(context_model.encode, answer)
+        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
+        # Compute similarity scores and sort context words
+        similarity_scores = [util.pytorch_cos_sim(await asyncio.to_thread(context_model.encode, word), answer_embedding).item() for word in context_words]
+        sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
+        options.extend(sorted_context_words[:n])
+        # Try to get similar words based on sense2vec
+        similar_words = await asyncio.to_thread(get_similar_words_sense2vec, answer, n)
+        options.extend(similar_words)
+        # If we don't have enough options, try synonyms
+        if len(options) < n + 1:
+            synonyms = await asyncio.to_thread(get_synonyms, answer, n - len(options) + 1)
+            options.extend(synonyms)
+        # Ensure we have the correct number of unique options
+        options = list(dict.fromkeys(options))[:n+1]
+        # Shuffle the options
+        random.shuffle(options)
+        return options
+    except Exception as e:
+        raise QuestionGenerationError(f"Error in generating options: {str(e)}")
+# Function to generate questions using beam search
+async def generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords):
+    try:
+        batches = segment_text(text)
+        keywords = extract_keywords(text, extract_all_keywords)
+        all_questions = []
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        for i, batch in enumerate(batches):
+            status_text.text(f"Processing batch {i+1} of {len(batches)}...")
+            batch_questions = await process_batch(batch, keywords, context_window_size, num_beams)
+            all_questions.extend(batch_questions)
+            progress_bar.progress((i + 1) / len(batches))
+            if len(all_questions) >= num_questions:
+                break
+        progress_bar.empty()
+        status_text.empty()
+        return all_questions[:num_questions]
+    except QuestionGenerationError as e:
+        st.error(f"An error occurred during question generation: {str(e)}")
+        return []
+    except Exception as e:
+        st.error(f"An unexpected error occurred: {str(e)}")
+        return []
+async def generate_fill_in_the_blank_questions(context,answer):
+    answerSize = len(answer)
+    replacedBlanks = ""
+    for i in range(answerSize):
+        replacedBlanks += "_"
+    blank_q = context.replace(answer,replacedBlanks)
+    return blank_q
+async def process_batch(batch, keywords, context_window_size, num_beams):
+    questions = []
+    for text in batch:
+        keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
+        for keyword, context in keyword_sentence_mapping.items():
+            question = await generate_question_async(context, keyword, num_beams)
+            options = await generate_options_async(keyword, context)
+            blank_question = await generate_fill_in_the_blank_questions(context,keyword)
+            overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
+            if overall_score >= 0.5:
+                questions.append({
+                    "question": question,
+                    "context": context,
+                    "answer": keyword,
+                    "options": options,
+                    "overall_score": overall_score,
+                    "relevance_score": relevance_score,
+                    "complexity_score": complexity_score,
+                    "spelling_correctness": spelling_correctness,
+                    "blank_question": blank_question,
+                })
+    return questions
+# Function to export questions to CSV
+def export_to_csv(data):
+    # df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
+    df = pd.DataFrame(data)
+    # csv = df.to_csv(index=False,encoding='utf-8')
+    csv = df.to_csv(index=False)
+    return csv
+# Function to export questions to PDF
+def export_to_pdf(data):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    for item in data:
+        pdf.multi_cell(0, 10, f"Context: {item['context']}")
+        pdf.multi_cell(0, 10, f"Question: {item['question']}")
+        pdf.multi_cell(0, 10, f"Answer: {item['answer']}")
+        pdf.multi_cell(0, 10, f"Options: {', '.join(item['options'])}")
+        pdf.multi_cell(0, 10, f"Overall Score: {item['overall_score']:.2f}")
+        pdf.ln(10)
+    return pdf.output(dest='S').encode('latin-1')
+def display_word_cloud(generated_questions):
+    word_frequency = {}
+    for question in generated_questions:
+        words = question.split()
+        for word in words:
+            word_frequency[word] = word_frequency.get(word, 0) + 1
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_frequency)
+    plt.figure(figsize=(10, 5))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis('off')
+    st.pyplot()
+def assess_question_quality(context, question, answer):
+    # Assess relevance using cosine similarity
+    context_doc = nlp(context)
+    question_doc = nlp(question)
+    relevance_score = context_doc.similarity(question_doc)
+    # Assess complexity using token length (as a simple metric)
+    complexity_score = min(len(question_doc) / 20, 1)  # Normalize to 0-1
+    # Assess Spelling correctness
+    misspelled = spell.unknown(question.split())
+    spelling_correctness = 1 - (len(misspelled) / len(question.split()))  # Normalize to 0-1
+    # Calculate overall score (you can adjust weights as needed)
+    overall_score = (
+        0.4 * relevance_score +
+        0.4 * complexity_score +
+        0.2 * spelling_correctness
+    )
+    return overall_score, relevance_score, complexity_score, spelling_correctness
 def main():
+    # Streamlit interface
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
     state = initialize_state(session_id)
         st.session_state.feedback_data = []
     with st.sidebar:
+        show_info = st.toggle('Show Info',True)
         if show_info:
             display_info()
         st.subheader("Customization Options")
         # Customization options
         input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
         with st.expander("Choose the Additional Elements to show"):
+            show_context = st.checkbox("Context",True)
             show_answer = st.checkbox("Answer",True)
+            show_options = st.checkbox("Options",False)
             show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
+            show_qa_scores = st.checkbox("QA Score",False)
             show_blank_question = st.checkbox("Fill in the Blank Questions",True)
         num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
         context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
         text = clean_text(text)
     with st.expander("Show text"):
         st.write(text)
     generate_questions_button = st.button("Generate Questions",help="This is the generate questions button")
     # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
+    # if generate_questions_button:
     if generate_questions_button and text:
         start_time = time.time()
         with st.spinner("Generating questions..."):
             try:
+                state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords))
                 if not state['generated_questions']:
                     st.warning("No questions were generated. The text might be too short or lack suitable content.")
                 else:
         # Export buttons
         # if st.session_state.generated_questions:
         if state['generated_questions']:
+            with st.sidebar:
+                csv_data = export_to_csv(state['generated_questions'])
+                st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
+                pdf_data = export_to_pdf(state['generated_questions'])
+                st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
             with st.expander("View Visualizations"):
                 questions = [tpl['question'] for tpl in state['generated_questions']]
                 overall_scores = pd.DataFrame(overall_scores,columns=['Overall Scores'])
                 st.line_chart(overall_scores)
     # View Feedback Statistics
     with st.expander("View Feedback Statistics"):
         analyze_feedback()