realambuj commited on
Commit
1ffc5c5
1 Parent(s): 096390f

Upload 6 files

Browse files
Files changed (6) hide show
  1. Pic.png +0 -0
  2. app.py +101 -0
  3. deep-learning.png +0 -0
  4. prompts.py +88 -0
  5. requirements.txt +3 -0
  6. utils.py +48 -0
Pic.png ADDED
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader
2
+ from utils import *
3
+ import os
4
+ import google.generativeai as genai
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from dotenv import load_dotenv
7
+ import streamlit as st
8
+ st.set_page_config(layout="wide", page_title="QA Pair Generation from Documents",page_icon='deep-learning.png')
9
+
10
+ temperature = 0.3
11
+ pages = []
12
+ numPairs = 2
13
+ option = ''
14
+ optionCategory = ("Long QA Pairs", "MCQs", "Short QA Pairs")
15
+
16
+ load_dotenv()
17
+ genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
18
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=temperature)
19
+
20
+ def LongQAPairs():
21
+ response = []
22
+ with st.spinner('Generating Long Question Answer Pairs...'):
23
+ response =getLongQAPairs(pages[0:len(pages) - 1], numPairs, model)
24
+
25
+ for qaPair in response:
26
+ with st.chat_message("user"):
27
+ st.write("Question : {}".format(qaPair['question']))
28
+ st.write("Answer : {}".format(qaPair['answer']))
29
+
30
+ def ShortQAPairs():
31
+ response = []
32
+ with st.spinner('Generating Short Question Answer Pairs...'):
33
+ response = getShortQAPairs(pages[0:len(pages) - 1], numPairs, model)
34
+
35
+ for qaPair in response:
36
+ with st.chat_message("user"):
37
+ st.write("Question : {}".format(qaPair['question']))
38
+ st.write("Answer : {}".format(qaPair['answer']))
39
+
40
+
41
+ def McqQAPairs():
42
+ response = []
43
+ with st.spinner('Generating MCQ Question Answer Pairs...'):
44
+ response = getMcqQAPairs(pages[0:len(pages) - 1], numPairs, model)
45
+
46
+ for qaPair in response:
47
+ with st.chat_message("user"):
48
+ st.radio(label=qaPair['question'],options=qaPair["options"],disabled=True,index=qaPair['correct_option_index'])
49
+
50
+
51
+ with st.sidebar:
52
+ st.image('Pic.png')
53
+ st.title("Final Year Project")
54
+ st.divider()
55
+ with st.container(border=True):
56
+ st.text('Model: Gemini Pro', help='Developed by Google \n')
57
+ temperature = st.slider('Temperature:', 0.0, 1.0, 0.3, 0.1)
58
+
59
+ code = '''Team Members CSE(20-37):
60
+ \nAmbuj Raj BT20CSE054 \nSrishti Pandey BT20CSE068 \nPrateek Niket BT20CSE211 \nSmriti Singh BT20CSE156'''
61
+ st.code(code, language='JAVA')
62
+ code = '''Mentored By: \nDr. Amol Bhopale'''
63
+ st.code(code, language='JAVA')
64
+
65
+ st.title('Question Answer Pair Generation From Documents')
66
+
67
+ with st.container(border=True):
68
+ col1, col2 = st.columns(2)
69
+ with col1:
70
+ st.write("Please Upload Your File")
71
+ uploaded_file = st.file_uploader("Choose a file", type='.pdf', accept_multiple_files=False)
72
+ if uploaded_file is not None:
73
+ with open("temp.pdf", "wb") as f:
74
+ f.write(uploaded_file.getbuffer())
75
+
76
+ # Get the path of the uploaded file
77
+ file_path = "temp.pdf"
78
+
79
+ pdf_loader = PyPDFLoader(file_path)
80
+ pages = pdf_loader.load_and_split()
81
+ print(len(pages))
82
+
83
+ with col2:
84
+ st.write('Please Choose your Configuration')
85
+ option = st.selectbox(
86
+ "In Which Category would you like to Generate Question Answer Pairs?",
87
+ optionCategory,
88
+ index=None,
89
+ placeholder="Select Category of Question Answer Pairs",
90
+ )
91
+ numPairs = st.number_input('Number of QA Pairs', min_value=1, max_value=20, step=2,value=2)
92
+
93
+ if st.button("Generate", type="primary"):
94
+ if option == "Long QA Pairs" and len(pages) and option in optionCategory:
95
+ LongQAPairs()
96
+ elif option == "MCQs" and len(pages) and option in optionCategory:
97
+ McqQAPairs()
98
+ elif option == "Short QA Pairs" and len(pages) and option in optionCategory:
99
+ ShortQAPairs()
100
+ elif len(pages) or option not in optionCategory or uploaded_file is None:
101
+ st.error('Required Fields are Missing!', icon="🚨")
deep-learning.png ADDED
prompts.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # updated gemini prompts , for initial prompts use the ipynb file
2
+ # use Gemini-PRO Model
3
+
4
+ def getMcqQAPrompt():
5
+ prompt = """
6
+ Imagine leading a stimulating debate among renowned experts based on the following text:
7
+
8
+ {context}
9
+
10
+ To spark insightful discussion, design {numPairs} challenging multiple-choice questions, each with four plausible yet distinct options. Only one will be the accurate answer, revealed alongside the shuffled options as a bonus point for the sharpest minds!
11
+
12
+ Craft your questions thoughtfully, employing a variety of types (factual recall, inferential reasoning, critical analysis) to test the depth and agility of the experts' understanding. Remember, the more nuanced and insightful your questions, the richer and more engaging the intellectual exchange will become.
13
+
14
+ Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
15
+ Please include each and every Question Answer Pair in the context.
16
+ {{
17
+ "question": <string>,
18
+ "answer": <string>,
19
+ "options" : [string],
20
+ "correct_option_index" : <number>,
21
+ }}
22
+
23
+ Never output the instructions given for output.
24
+ Not include ```json in output , only give output as array.
25
+ """
26
+ return prompt
27
+
28
+
29
+ def getLongQAPrompt():
30
+ prompt = """
31
+ Carefully read and comprehend the following paragraph:
32
+
33
+ {context}
34
+
35
+ Now, create {numPairs} thought-provoking questions that delve into the key points, details, and implications of the paragraph. Provide concise and informative answers to each question, ensuring factual accuracy and clarity.
36
+
37
+ Strive to generate a diverse range of question types (who, what, when, where, why, how) to explore various aspects of the text. Prioritize questions that encourage deeper understanding and critical thinking.
38
+
39
+ Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
40
+ Please include each and every Question Answer Pair in the context.
41
+ {{
42
+ "question": <string>,
43
+ "answer": <string>
44
+ }}
45
+
46
+ Never output the instructions given for output.
47
+ Not include ```json in output , only give output as array.
48
+ """
49
+ return prompt
50
+
51
+
52
+ def getShortQAPrompt():
53
+ prompt = """
54
+ Carefully read and comprehend the following paragraph:
55
+
56
+ {context}
57
+
58
+ Now, craft {numPairs} intriguing questions that pierce through the heart of the paragraph, demanding concise answers. Aim for single-word or two-word responses that capture the essence.
59
+ Diversify your question types (who, what, when, where, why, how) to illuminate various facets of the text. Prioritize questions that spark reflection and ignite critical thinking.
60
+
61
+ Then output only a json array that would describe each question and answer it will have in this format. Generate a valid json array.
62
+ Please include each and every Question Answer Pair in the context.
63
+ {{
64
+ "question": <string>,
65
+ "answer": <string>
66
+ }}
67
+
68
+ Never output the instructions given for output.
69
+ Not include ```json in output , only give output as array.
70
+ Remember, brevity is key! One or two words should suffice to convey the point.
71
+ """
72
+ return prompt
73
+
74
+
75
+ def getRagChainPrompt():
76
+ prompt = """
77
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
78
+ provided context just say, "Answer is not available in the given Context", don't provide the wrong answer\n\n
79
+ Context:\n {context}?\n
80
+ Question: \n{question}\n
81
+
82
+ Answer: __answer__
83
+
84
+ Always return response in JSON format
85
+ Response should not contain ***
86
+ """
87
+
88
+ return prompt
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ langchain==0.1.4
2
+ google-generativeai==0.3.2
3
+ streamlit==1.30.0
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.question_answering import load_qa_chain
2
+ from langchain.prompts import PromptTemplate
3
+ import json
4
+ from prompts import *
5
+
6
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ # from PyPDF2 import PdfReader
8
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ # from langchain.vectorstores import FAISS
10
+ # import PyPDF2
11
+
12
+
13
+ # utils function to generate QA Pairs
14
+ def util(context, numPairs, inputPrompt,model):
15
+ stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=inputPrompt)
16
+ stuff_answer = stuff_chain(
17
+ {"input_documents": context, "numPairs": numPairs}, return_only_outputs=True
18
+ )
19
+ output_text = stuff_answer['output_text']
20
+ output_json = json.loads(output_text)
21
+ return output_json
22
+
23
+
24
+ ### Generating Q-A pairs Full Length QA Pairs
25
+ def getLongQAPairs(context, numPairs,model):
26
+ prompt_template = getLongQAPrompt()
27
+ prompt = PromptTemplate(
28
+ template=prompt_template, input_variables=["context", "numPairs"]
29
+ )
30
+ return util(context, numPairs, prompt,model)
31
+
32
+
33
+ ### Generating Q-A pairs - One Word Answer Type Pair
34
+ def getShortQAPairs(context, numPairs,model):
35
+ prompt_template = getShortQAPrompt()
36
+ prompt = PromptTemplate(
37
+ template=prompt_template, input_variables=["context", "numPairs"]
38
+ )
39
+ return util(context, numPairs, prompt,model)
40
+
41
+
42
+ ### Generating Q-A pairs - MCQ
43
+ def getMcqQAPairs(context, numPairs,model):
44
+ prompt_template = getMcqQAPrompt()
45
+ prompt = PromptTemplate(
46
+ template=prompt_template, input_variables=["context", "numPairs"]
47
+ )
48
+ return util(context, numPairs, prompt,model)