File size: 5,939 Bytes
fdcd573
 
 
 
 
 
 
 
911ed13
fdcd573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import gradio as gr
import os
import fitz  # PyMuPDF for PDF extraction
from pptx import Presentation
from docx import Document
from groq import Groq

# Initialize Groq Client
api_key = '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' # Use environment variable for the API key
client = Groq(api_key=api_key)

# File Extraction Functions
def extract_text_from_pdf(file_path):
    pdf_text = ""
    try:
        pdf_file = fitz.open(file_path)
        for page_num in range(pdf_file.page_count):
            page = pdf_file.load_page(page_num)
            pdf_text += page.get_text()
    except Exception as e:
        return f"Error reading PDF: {e}"
    return pdf_text

def extract_text_from_ppt(file_path):
    ppt_text = ""
    try:
        presentation = Presentation(file_path)
        for slide in presentation.slides:
            for shape in slide.shapes:
                if hasattr(shape, 'text'):
                    ppt_text += shape.text + "\n"
    except Exception as e:
        return f"Error reading PPT: {e}"
    return ppt_text

def extract_text_from_word(file_path):
    doc_text = ""
    try:
        document = Document(file_path)
        for paragraph in document.paragraphs:
            doc_text += paragraph.text + "\n"
    except Exception as e:
        return f"Error reading Word file: {e}"
    return doc_text

def process_files(file_paths):
    text_data = ""
    for file_path in file_paths:
        if file_path.endswith(".pdf"):
            text_data += extract_text_from_pdf(file_path)
        elif file_path.endswith(".pptx"):
            text_data += extract_text_from_ppt(file_path)
        elif file_path.endswith(".docx"):
            text_data += extract_text_from_word(file_path)
        else:
            text_data += f"Unsupported file format: {file_path}\n"
    return text_data

# Generate MCQs and Subjective Questions Using Groq
def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium", question_type="mix"):
    try:
        num_mcqs = min(num_mcqs, 40)
        num_subjective = min(num_subjective, 20)

        difficulty_levels = {
            "easy": "simple questions with direct answers.",
            "medium": "moderate complexity questions requiring reasoning.",
            "hard": "challenging questions requiring deep understanding."
        }

        question_type_map = {
            "reason": "Generate reasoning-based questions.",
            "short": "Generate short-answer questions.",
            "long": "Generate long-answer questions.",
            "case study": "Generate case study-based questions.",
            "mix": "Generate a mix of question types."
        }

        prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions. {question_type_map.get(question_type, 'mix')}"

        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-8b-8192",
        )

        response = chat_completion.choices[0].message.content.strip()

        # Split response into MCQs and Subjective questions
        mcqs, subjective = "", ""
        is_subjective_section = False

        for line in response.split("\n"):
            if "**Subjective Questions**" in line:
                is_subjective_section = True
            if is_subjective_section:
                subjective += line + "\n"
            else:
                mcqs += line + "\n"

        return mcqs, subjective

    except Exception as e:
        return f"Error generating questions: {e}", ""

# Gradio Interface Function
def process_and_generate(file_paths, raw_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type):
    combined_text = ""

    # Extract text from uploaded files
    if file_paths:
        extracted_text = process_files(file_paths)
        if extracted_text.strip():
            combined_text += extracted_text
    
    # Add raw text if provided
    if raw_text.strip():
        if combined_text:
            combined_text += "\n" + raw_text
        else:
            combined_text = raw_text
    
    if not combined_text.strip():
        return "No text provided to generate questions.", "No text provided to generate questions."
    
    try:
        mcqs, subjective = generate_questions(
            combined_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type
        )
        return mcqs, subjective
    except Exception as e:
        return f"Error generating questions: {e}", f"Error generating questions: {e}"

# Gradio Inputs and Outputs
inputs = [
    gr.File(file_count="multiple", type="filepath", label="Upload Files (.pdf, .pptx, .docx)"),
    gr.Textbox(lines=3, placeholder="Enter raw text here (Optional)...", label="Raw Text"),
    gr.Slider(minimum=2, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"),
    gr.Slider(minimum=2, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"),
    gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"),
    gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium"),
    gr.Radio(["reason", "short", "long", "case study", "mix"], label="Select Type of Question", value="mix")
]

outputs = [
    gr.Textbox(label="Generated MCQs", lines=10),
    gr.Textbox(label="Generated Subjective Questions", lines=10)
]

# Gradio Interface
gr.Interface(
    fn=process_and_generate,
    inputs=inputs,
    outputs=outputs,
    title="MCQ & Subjective Question Generator",
    live=False
).launch()