File size: 7,170 Bytes
8a51490
 
 
 
4ac434a
8a51490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 25 18:18:27 2023

@author: danielshirazi
"""

import gradio as gr
import fitz  # PyMuPDF
import questiongenerator as qs
import random

from sentence_transformers import SentenceTransformer, util
from questiongenerator import QuestionGenerator
qg = QuestionGenerator()



def highlight_similar_sentence(text1, text2, color='yellow'):
    # Load the pre-trained sentence-transformers model
    model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

    # Split text into sentences
    sentences_text1 = [sentence.strip() for sentence in text1.split('.') if sentence.strip()]
    sentences_text2 = [sentence.strip() for sentence in text2.split('.') if sentence.strip()]

    # Compute embeddings for text1
    #embeddings_text1 = model.encode(sentences_text1, convert_to_tensor=True)

    highlighted_text2 = text2
    max_similarity = 0.0

    # Find the most similar sentence in text2 for each sentence in text1
    for sentence_text1 in sentences_text1:
        # Compute embeddings for the current sentence in text1
        embedding_text1 = model.encode(sentence_text1, convert_to_tensor=True)

        for sentence_text2 in sentences_text2:
            # Compute cosine similarity between sentence in text1 and text2
            embedding_text2 = model.encode(sentence_text2, convert_to_tensor=True)
            similarity = util.pytorch_cos_sim(embedding_text1, embedding_text2).item()

            # Highlight the most similar sentence in text2
            if similarity > max_similarity:
                max_similarity = similarity
                highlighted_text2= highlight_text(text2, sentence_text2, color=color)
                #highlighted_text2 = text2.replace(sentence_text2, f"<span style='background-color: {color};'>{sentence_text2}</span>")

    return highlighted_text2


def Extract_QA(qlist,selected_extracted_text):
        Q_All=''
        A_All=''
        xs=['A','B','C','D']
        h_colors=['yellow', 'red', 'DodgerBlue', 'Orange', 'Violet']
        for i in range(len(qlist)):
            question_i= qlist[i]['question']
            Choices_ans= []
            Choice_is_correct=[]
            for j in range(4):
               Choices_ans= Choices_ans+ [qlist[i]['answer'][j]['answer']]
               Choice_is_correct= Choice_is_correct+ [qlist[i]['answer'][j]['correct']]
               
            Q=f"""
                 Q_{i+1}: {question_i}
                 A. {Choices_ans[0]}
                 B. {Choices_ans[1]}
                 C. {Choices_ans[2]}
                 D. {Choices_ans[3]} 
                                  
                """
            
            result = [x for x, y in zip(xs, Choice_is_correct) if y ]
            correct_answer= [x for x, y in zip(Choices_ans, Choice_is_correct) if y ]
            A= f"""
                <p>Answer_{i+1}: {result[0]} - {correct_answer[0]}<p>
                
                
                """
            color= h_colors[i]
            
            A_sen= f""" The correct answer is {correct_answer[0]}."""
            
            A= highlight_text(input_text=A, selcted_text=correct_answer[0], color=color)
            selected_extracted_text= highlight_similar_sentence(A_sen, selected_extracted_text, color=color)
            
            
            Q_All= Q_All+Q
            A_All=A_All+ A
            
            
        return (Q_All,A_All,selected_extracted_text)






def extract_text_from_pdf(pdf_file_path):
    # Read the PDF file
    global extracted_text
    text = []
    with fitz.open(pdf_file_path) as doc:
        for page in doc:
            text.append(page.get_text())
    extracted_text= '\n'.join(text)
    extracted_text= get_sub_text(extracted_text)
    
    return ("The pdf is uploaded Successfully from:"+ str(pdf_file_path))

qg = qs.QuestionGenerator()

def get_sub_text(TXT):
   sub_texts= qg._split_into_segments(TXT)
   if isinstance(sub_texts, list):
       return sub_texts
   else:
       return [sub_texts]

def highlight_text(input_text, selcted_text, color='yellow'):
    # Replace 'highlight' with <span> tags for highlighting
    highlighted_text = input_text.replace(selcted_text, f'<span style="background-color: {color}">{selcted_text}</span>')
    return highlighted_text


def pick_One_txt(sub_texts):
    global selected_extracted_text
    N= len(sub_texts)
    if N==1:
       selected_extracted_text= sub_texts[0] 
       return(selected_extracted_text)
    # Generate a random number between low and high
    random_number = random.uniform(0, N)    
    # Pick the integer part of the random number
    random_number = int(random_number)
    selected_extracted_text= sub_texts[random_number]
        
    return(selected_extracted_text)
 

def pipeline(NoQs):
    global Q,A
    text= selected_extracted_text
    qlist= qg.generate(text, num_questions=NoQs, answer_style="multiple_choice")
    Q,A,highligthed_text= Extract_QA(qlist,text)
    A= A + '\n'+highligthed_text
    return (Q,A)

def ReurnAnswer():
    return A

def GetQuestion(NoQs):
    NoQs=int(NoQs)
    pick_One_txt(extracted_text)
    Q,A=pipeline(NoQs)
    return Q

with gr.Blocks() as demo:
    

    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                gr.Image("PupQuizAI.png")            
                gr.Markdown(""" 🐶 **PupQuizAI** is an Artificial-Intelligence tool that streamlines the studying process. Simply input a text pdf that you need to study from. Then, PupQuiz will create 1-5 custom questions for you to study from each time you push 'Show Questions'. 
                             """ )
            
            input_file=gr.UploadButton(label='Select a file!', file_types=[".pdf"])
            input_file.upload(extract_text_from_pdf, input_file)
            #upload_btn = gr.Button(value="Upload the pdf File.")
            Gen_Question = gr.Button(value="Show Questions")
            Gen_Answer = gr.Button(value="Show Answers")
            No_Qs= gr.Slider(minimum=1, maximum=5,value=3, step=1, label='Max # of Questions')
    
            gr.Markdown(""" 🐶 
                        **Instructions:**                     
                        * Start by selecting a 'pdf' text file you want to upload by clicking the "Select file" button. (PupQuiz currently only supports files that can have highlightable text)
                        * Select the number of questions you want generated from the "# of Questions" selector. 
                        * Click "Show Questions" 
                        * Then, if you want answers to the questions, select "Show Answers" """  )
         
            #gr.Image("PupQuizAI.png")
            
            
            
        with gr.Column(scale=2.0):
            #file_stat= gr.Textbox(label="File Status")
            question = gr.Textbox(label="Question(s)")
            #Answer = gr.Textbox(label="Answer(s)")
            Answer = gr.HTML(label="Answer(s)")

    Gen_Question.click(GetQuestion, inputs=No_Qs, outputs=question, api_name="QuestioGenerator")
    Gen_Answer.click(ReurnAnswer, inputs=None, outputs=Answer, api_name="QuestioGenerator")
   

demo.launch()