Upload app5.py
Browse files
app5.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Mon Dec 25 18:18:27 2023
|
4 |
+
|
5 |
+
@author: alish
|
6 |
+
"""
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import fitz # PyMuPDF
|
10 |
+
import questiongenerator as qs
|
11 |
+
import random
|
12 |
+
|
13 |
+
from sentence_transformers import SentenceTransformer, util
|
14 |
+
from questiongenerator import QuestionGenerator
|
15 |
+
qg = QuestionGenerator()
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
def highlight_similar_sentence(text1, text2, color='yellow'):
|
21 |
+
# Load the pre-trained sentence-transformers model
|
22 |
+
model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
|
23 |
+
|
24 |
+
# Split text into sentences
|
25 |
+
sentences_text1 = [sentence.strip() for sentence in text1.split('.') if sentence.strip()]
|
26 |
+
sentences_text2 = [sentence.strip() for sentence in text2.split('.') if sentence.strip()]
|
27 |
+
|
28 |
+
# Compute embeddings for text1
|
29 |
+
#embeddings_text1 = model.encode(sentences_text1, convert_to_tensor=True)
|
30 |
+
|
31 |
+
highlighted_text2 = text2
|
32 |
+
max_similarity = 0.0
|
33 |
+
|
34 |
+
# Find the most similar sentence in text2 for each sentence in text1
|
35 |
+
for sentence_text1 in sentences_text1:
|
36 |
+
# Compute embeddings for the current sentence in text1
|
37 |
+
embedding_text1 = model.encode(sentence_text1, convert_to_tensor=True)
|
38 |
+
|
39 |
+
for sentence_text2 in sentences_text2:
|
40 |
+
# Compute cosine similarity between sentence in text1 and text2
|
41 |
+
embedding_text2 = model.encode(sentence_text2, convert_to_tensor=True)
|
42 |
+
similarity = util.pytorch_cos_sim(embedding_text1, embedding_text2).item()
|
43 |
+
|
44 |
+
# Highlight the most similar sentence in text2
|
45 |
+
if similarity > max_similarity:
|
46 |
+
max_similarity = similarity
|
47 |
+
highlighted_text2= highlight_text(text2, sentence_text2, color=color)
|
48 |
+
#highlighted_text2 = text2.replace(sentence_text2, f"<span style='background-color: {color};'>{sentence_text2}</span>")
|
49 |
+
|
50 |
+
return highlighted_text2
|
51 |
+
|
52 |
+
|
53 |
+
def Extract_QA(qlist,selected_extracted_text):
|
54 |
+
Q_All=''
|
55 |
+
A_All=''
|
56 |
+
xs=['A','B','C','D']
|
57 |
+
h_colors=['yellow', 'red', 'DodgerBlue', 'Orange', 'Violet']
|
58 |
+
for i in range(len(qlist)):
|
59 |
+
question_i= qlist[i]['question']
|
60 |
+
Choices_ans= []
|
61 |
+
Choice_is_correct=[]
|
62 |
+
for j in range(4):
|
63 |
+
Choices_ans= Choices_ans+ [qlist[i]['answer'][j]['answer']]
|
64 |
+
Choice_is_correct= Choice_is_correct+ [qlist[i]['answer'][j]['correct']]
|
65 |
+
|
66 |
+
Q=f"""
|
67 |
+
Q_{i+1}: {question_i}
|
68 |
+
A. {Choices_ans[0]}
|
69 |
+
B. {Choices_ans[1]}
|
70 |
+
C. {Choices_ans[2]}
|
71 |
+
D. {Choices_ans[3]}
|
72 |
+
|
73 |
+
"""
|
74 |
+
|
75 |
+
result = [x for x, y in zip(xs, Choice_is_correct) if y ]
|
76 |
+
correct_answer= [x for x, y in zip(Choices_ans, Choice_is_correct) if y ]
|
77 |
+
A= f"""
|
78 |
+
<p>Answer_{i+1}: {result[0]} - {correct_answer[0]}<p>
|
79 |
+
|
80 |
+
|
81 |
+
"""
|
82 |
+
color= h_colors[i]
|
83 |
+
|
84 |
+
A_sen= f""" The correct answer is {correct_answer[0]}."""
|
85 |
+
|
86 |
+
A= highlight_text(input_text=A, selcted_text=correct_answer[0], color=color)
|
87 |
+
selected_extracted_text= highlight_similar_sentence(A_sen, selected_extracted_text, color=color)
|
88 |
+
|
89 |
+
|
90 |
+
Q_All= Q_All+Q
|
91 |
+
A_All=A_All+ A
|
92 |
+
|
93 |
+
|
94 |
+
return (Q_All,A_All,selected_extracted_text)
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
def extract_text_from_pdf(pdf_file_path):
|
102 |
+
# Read the PDF file
|
103 |
+
global extracted_text
|
104 |
+
text = []
|
105 |
+
with fitz.open(pdf_file_path) as doc:
|
106 |
+
for page in doc:
|
107 |
+
text.append(page.get_text())
|
108 |
+
extracted_text= '\n'.join(text)
|
109 |
+
extracted_text= get_sub_text(extracted_text)
|
110 |
+
|
111 |
+
|
112 |
+
#return ("The pdf is uploaded Successfully from:"+ str(pdf_file_path))
|
113 |
+
return ("File is uploaded Successfuly!")
|
114 |
+
qg = qs.QuestionGenerator()
|
115 |
+
|
116 |
+
def get_sub_text(TXT):
|
117 |
+
sub_texts= qg._split_into_segments(TXT)
|
118 |
+
if isinstance(sub_texts, list):
|
119 |
+
return sub_texts
|
120 |
+
else:
|
121 |
+
return [sub_texts]
|
122 |
+
|
123 |
+
def highlight_text(input_text, selcted_text, color='yellow'):
|
124 |
+
# Replace 'highlight' with <span> tags for highlighting
|
125 |
+
highlighted_text = input_text.replace(selcted_text, f'<span style="background-color: {color}">{selcted_text}</span>')
|
126 |
+
return highlighted_text
|
127 |
+
|
128 |
+
|
129 |
+
def pick_One_txt(sub_texts):
|
130 |
+
global selected_extracted_text
|
131 |
+
N= len(sub_texts)
|
132 |
+
if N==1:
|
133 |
+
selected_extracted_text= sub_texts[0]
|
134 |
+
return(selected_extracted_text)
|
135 |
+
# Generate a random number between low and high
|
136 |
+
random_number = random.uniform(0, N)
|
137 |
+
# Pick the integer part of the random number
|
138 |
+
random_number = int(random_number)
|
139 |
+
selected_extracted_text= sub_texts[random_number]
|
140 |
+
|
141 |
+
return(selected_extracted_text)
|
142 |
+
|
143 |
+
|
144 |
+
def pipeline(NoQs):
|
145 |
+
global Q,A
|
146 |
+
text= selected_extracted_text
|
147 |
+
qlist= qg.generate(text, num_questions=NoQs, answer_style="multiple_choice")
|
148 |
+
Q,A,highligthed_text= Extract_QA(qlist,text)
|
149 |
+
A= A + '\n'+highligthed_text
|
150 |
+
return (Q,A)
|
151 |
+
|
152 |
+
def ReurnAnswer():
|
153 |
+
return A
|
154 |
+
|
155 |
+
def GetQuestion(NoQs):
|
156 |
+
NoQs=int(NoQs)
|
157 |
+
pick_One_txt(extracted_text)
|
158 |
+
Q,A=pipeline(NoQs)
|
159 |
+
return Q
|
160 |
+
|
161 |
+
with gr.Blocks() as demo:
|
162 |
+
global input_file
|
163 |
+
|
164 |
+
with gr.Row():
|
165 |
+
with gr.Column(scale=1):
|
166 |
+
with gr.Row():
|
167 |
+
gr.Image("PupQuizAI.png")
|
168 |
+
gr.Markdown(""" 🐶 **PupQuizAI** is an Artificial-Intelligence tool that streamlines the studying process. Simply input a text pdf that you need to study from. Then, PupQuiz will create 1-5 custom questions for you to study from each time you push 'Show Questions'.
|
169 |
+
""" )
|
170 |
+
with gr.Row():
|
171 |
+
input_file=gr.UploadButton(label='Select a file!', file_types=[".pdf"])
|
172 |
+
#status = gr.Textbox(label="Status")
|
173 |
+
status = gr.HTML( )
|
174 |
+
input_file.upload(fn=extract_text_from_pdf, inputs=input_file,outputs=status)
|
175 |
+
|
176 |
+
#upload_btn = gr.Button(value="Upload the pdf File.")
|
177 |
+
Gen_Question = gr.Button(value="Show Questions")
|
178 |
+
Gen_Answer = gr.Button(value="Show Answers")
|
179 |
+
No_Qs= gr.Slider(minimum=1, maximum=5,value=3, step=1, label='Max # of Questions')
|
180 |
+
|
181 |
+
gr.Markdown(""" 🐶
|
182 |
+
**Instructions:**
|
183 |
+
* Start by selecting a 'pdf' text file you want to upload by clicking the "Select file" button. (PupQuiz currently only supports files that can have highlightable text)
|
184 |
+
* Select the number of questions you want generated from the "# of Questions" selector.
|
185 |
+
* Click "Show Questions"
|
186 |
+
* Then, if you want answers to the questions, select "Show Answers" """ )
|
187 |
+
|
188 |
+
#gr.Image("PupQuizAI.png")
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
+
with gr.Column(scale=2.0):
|
193 |
+
#file_stat= gr.Textbox(label="File Status")
|
194 |
+
question = gr.Textbox(label="Question(s)")
|
195 |
+
#Answer = gr.Textbox(label="Answer(s)")
|
196 |
+
Answer = gr.HTML(label="Answer(s)")
|
197 |
+
|
198 |
+
Gen_Question.click(GetQuestion, inputs=No_Qs, outputs=question, api_name="QuestioGenerator")
|
199 |
+
Gen_Answer.click(ReurnAnswer, inputs=None, outputs=Answer, api_name="QuestioGenerator")
|
200 |
+
|
201 |
+
|
202 |
+
demo.launch()
|