File size: 7,162 Bytes
8a51490 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 25 18:18:27 2023
@author: alish
"""
import gradio as gr
import fitz # PyMuPDF
import questiongenerator as qs
import random
from sentence_transformers import SentenceTransformer, util
from questiongenerator import QuestionGenerator
qg = QuestionGenerator()
def highlight_similar_sentence(text1, text2, color='yellow'):
# Load the pre-trained sentence-transformers model
model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
# Split text into sentences
sentences_text1 = [sentence.strip() for sentence in text1.split('.') if sentence.strip()]
sentences_text2 = [sentence.strip() for sentence in text2.split('.') if sentence.strip()]
# Compute embeddings for text1
#embeddings_text1 = model.encode(sentences_text1, convert_to_tensor=True)
highlighted_text2 = text2
max_similarity = 0.0
# Find the most similar sentence in text2 for each sentence in text1
for sentence_text1 in sentences_text1:
# Compute embeddings for the current sentence in text1
embedding_text1 = model.encode(sentence_text1, convert_to_tensor=True)
for sentence_text2 in sentences_text2:
# Compute cosine similarity between sentence in text1 and text2
embedding_text2 = model.encode(sentence_text2, convert_to_tensor=True)
similarity = util.pytorch_cos_sim(embedding_text1, embedding_text2).item()
# Highlight the most similar sentence in text2
if similarity > max_similarity:
max_similarity = similarity
highlighted_text2= highlight_text(text2, sentence_text2, color=color)
#highlighted_text2 = text2.replace(sentence_text2, f"<span style='background-color: {color};'>{sentence_text2}</span>")
return highlighted_text2
def Extract_QA(qlist,selected_extracted_text):
Q_All=''
A_All=''
xs=['A','B','C','D']
h_colors=['yellow', 'red', 'DodgerBlue', 'Orange', 'Violet']
for i in range(len(qlist)):
question_i= qlist[i]['question']
Choices_ans= []
Choice_is_correct=[]
for j in range(4):
Choices_ans= Choices_ans+ [qlist[i]['answer'][j]['answer']]
Choice_is_correct= Choice_is_correct+ [qlist[i]['answer'][j]['correct']]
Q=f"""
Q_{i+1}: {question_i}
A. {Choices_ans[0]}
B. {Choices_ans[1]}
C. {Choices_ans[2]}
D. {Choices_ans[3]}
"""
result = [x for x, y in zip(xs, Choice_is_correct) if y ]
correct_answer= [x for x, y in zip(Choices_ans, Choice_is_correct) if y ]
A= f"""
<p>Answer_{i+1}: {result[0]} - {correct_answer[0]}<p>
"""
color= h_colors[i]
A_sen= f""" The correct answer is {correct_answer[0]}."""
A= highlight_text(input_text=A, selcted_text=correct_answer[0], color=color)
selected_extracted_text= highlight_similar_sentence(A_sen, selected_extracted_text, color=color)
Q_All= Q_All+Q
A_All=A_All+ A
return (Q_All,A_All,selected_extracted_text)
def extract_text_from_pdf(pdf_file_path):
# Read the PDF file
global extracted_text
text = []
with fitz.open(pdf_file_path) as doc:
for page in doc:
text.append(page.get_text())
extracted_text= '\n'.join(text)
extracted_text= get_sub_text(extracted_text)
return ("The pdf is uploaded Successfully from:"+ str(pdf_file_path))
qg = qs.QuestionGenerator()
def get_sub_text(TXT):
sub_texts= qg._split_into_segments(TXT)
if isinstance(sub_texts, list):
return sub_texts
else:
return [sub_texts]
def highlight_text(input_text, selcted_text, color='yellow'):
# Replace 'highlight' with <span> tags for highlighting
highlighted_text = input_text.replace(selcted_text, f'<span style="background-color: {color}">{selcted_text}</span>')
return highlighted_text
def pick_One_txt(sub_texts):
global selected_extracted_text
N= len(sub_texts)
if N==1:
selected_extracted_text= sub_texts[0]
return(selected_extracted_text)
# Generate a random number between low and high
random_number = random.uniform(0, N)
# Pick the integer part of the random number
random_number = int(random_number)
selected_extracted_text= sub_texts[random_number]
return(selected_extracted_text)
def pipeline(NoQs):
global Q,A
text= selected_extracted_text
qlist= qg.generate(text, num_questions=NoQs, answer_style="multiple_choice")
Q,A,highligthed_text= Extract_QA(qlist,text)
A= A + '\n'+highligthed_text
return (Q,A)
def ReurnAnswer():
return A
def GetQuestion(NoQs):
NoQs=int(NoQs)
pick_One_txt(extracted_text)
Q,A=pipeline(NoQs)
return Q
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
gr.Image("PupQuizAI.png")
gr.Markdown(""" 🐶 **PupQuizAI** is an Artificial-Intelligence tool that streamlines the studying process. Simply input a text pdf that you need to study from. Then, PupQuiz will create 1-5 custom questions for you to study from each time you push 'Show Questions'.
""" )
input_file=gr.UploadButton(label='Select a file!', file_types=[".pdf"])
input_file.upload(extract_text_from_pdf, input_file)
#upload_btn = gr.Button(value="Upload the pdf File.")
Gen_Question = gr.Button(value="Show Questions")
Gen_Answer = gr.Button(value="Show Answers")
No_Qs= gr.Slider(minimum=1, maximum=5,value=3, step=1, label='Max # of Questions')
gr.Markdown(""" 🐶
**Instructions:**
* Start by selecting a 'pdf' text file you want to upload by clicking the "Select file" button. (PupQuiz currently only supports files that can have highlightable text)
* Select the number of questions you want generated from the "# of Questions" selector.
* Click "Show Questions"
* Then, if you want answers to the questions, select "Show Answers" """ )
#gr.Image("PupQuizAI.png")
with gr.Column(scale=2.0):
#file_stat= gr.Textbox(label="File Status")
question = gr.Textbox(label="Question(s)")
#Answer = gr.Textbox(label="Answer(s)")
Answer = gr.HTML(label="Answer(s)")
Gen_Question.click(GetQuestion, inputs=No_Qs, outputs=question, api_name="QuestioGenerator")
Gen_Answer.click(ReurnAnswer, inputs=None, outputs=Answer, api_name="QuestioGenerator")
demo.launch() |