Rehman1603 commited on
Commit
7ff0a58
1 Parent(s): be67fc8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from langchain import PromptTemplate
5
+ from langchain import LLMChain
6
+ from langchain_together import Together
7
+ import re
8
+ from docx import Document
9
+ import os
10
+
11
+ # Initialize Together API key
12
+ os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
13
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
14
+ llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
15
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
16
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
17
+ def Summary_BART(text):
18
+ inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
19
+ summary_ids = model.generate(inputs["input_ids"])
20
+ summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
21
+ return summary[0]
22
+
23
+ def DocToQuizz(file, difficulty_level):
24
+ # Read the PDF content
25
+ reader = PdfReader(file)
26
+ text = ""
27
+ for page in reader.pages:
28
+ text += page.extract_text()
29
+ summary = Summary_BART(text)
30
+
31
+ # Define the prompt template for generating questions
32
+ mcq_template = """
33
+ Generate 20 different questions based on the following summary: {summary}
34
+ The difficulty level of the questions should be: {difficulty_level}
35
+
36
+ For the multiple-choice questions (MCQs), please provide the following for each question:
37
+ 1. Question
38
+ - Use varied question formats such as:
39
+ - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
40
+ - Ensure questions are logically phrased and relevant to the content.
41
+ 2. Correct answer
42
+ 3. Three plausible incorrect answer options
43
+ 4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
44
+
45
+ For the short questions, please provide:
46
+ 1. Question
47
+ - Use varied question formats to encourage conceptual understanding and avoid repetition.
48
+ - Ensure the short questions do not overlap in content with the MCQs.
49
+ 2. Short, concise answer
50
+ 3. Format: "SQ: <question text>\nAnswer: <answer>"
51
+
52
+ Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
53
+ """
54
+ prompt = PromptTemplate(
55
+ input_variables=['summary', 'difficulty_level'],
56
+ template=mcq_template
57
+ )
58
+
59
+ Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
60
+
61
+ response = Generated_mcqs.invoke({
62
+ "summary": summary,
63
+ "difficulty_level": difficulty_level
64
+ })
65
+
66
+ response_text = response['text']
67
+
68
+ # Extract MCQs and Short Questions
69
+ mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
70
+ short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n'
71
+
72
+ mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
73
+ short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)
74
+
75
+ # Initialize a Word document
76
+ doc = Document()
77
+ doc.add_heading("Physics Questions", level=1)
78
+
79
+ # Add a section for MCQs with options
80
+ doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
81
+ for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
82
+ # Split incorrect answers
83
+ incorrect_answers = incorrect_answers.split(', ')
84
+
85
+ # Add question and options to the document
86
+ doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
87
+ doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
88
+ for i, incorrect in enumerate(incorrect_answers, start=2):
89
+ doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")
90
+
91
+ # Add a page break and section for Short Questions
92
+ doc.add_page_break()
93
+ doc.add_heading("Short Questions", level=2)
94
+ for idx, question in enumerate(short_questions, start=1):
95
+ doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")
96
+
97
+ # Save the document
98
+ doc.save("Physics_Questions.docx")
99
+ return "Physics_Questions.docx"
100
+
101
+ # Get list of PDF files in the directory
102
+ pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
103
+ difficulty_levels = ["Easy", "Medium", "Hard"]
104
+
105
+ # Gradio Interface
106
+ def generate_quiz(file, difficulty_level):
107
+ output_file = DocToQuizz(file, difficulty_level)
108
+ return output_file
109
+
110
+ interface = gr.Interface(
111
+ fn=generate_quiz,
112
+ inputs=[
113
+ gr.Dropdown(pdf_files, label="Select PDF File"),
114
+ gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
115
+ ],
116
+ outputs=gr.File(label="Download Quiz Document"),
117
+ title="Quiz Generator",
118
+ description="Select a PDF file and difficulty level to generate quiz questions."
119
+ )
120
+
121
+ # Launch the interface
122
+ interface.launch(debug=True)