File size: 12,697 Bytes
6c63e71
 
 
 
 
 
 
83b902b
 
6c63e71
 
 
 
f447ffd
6c63e71
 
 
 
f447ffd
6c63e71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
import numpy as np
import cv2
from PIL import Image, ImageEnhance
from io import BytesIO
from pdf2image import convert_from_path
import json

from gapi_client import get_genai_client
from utils import extract_json_from_output

# Global GenAI client
CLIENT = None

def init_genai():
    """
    Initialize the global GenAI client with the provided API key.
    """
    global CLIENT
    CLIENT = get_genai_client()


def parse_all_answers(image_input: Image.Image) -> str:
    """
    Extracts answers from a full answer-sheet image using Gemini.
    Returns the raw JSON string from the model.
    """
    output_format = '''
Answer in the following JSON format. Do not write anything else:
{
  "Paper name": {"name": "<paper Alphabet>"},
  "Answers": {
    "1": "<option or text>",
    "2": "<option or text>",
    "3": "<option or text>",
    "4": "<option or text>",
    "5": "<option or text>",
    "6": "<option or text>",
    "7": "<option or text>",
    "8": "<option or text>",
    "9": "<option or text>",
    "10": "<option or text>",
    "11": "<option or text>",
    "12": "<option or text>",
    "13": "<option or text>",
    "14": "<option or text>",
    "15": "<option or text>",
    "16": "<option or text>",
    "17": "<option or text>",
    "18": "<option or text>",
    "19": "<option or text>",
    "20": "<option or text>",
    "21": "<free text answer>",
    "22": "<free text answer>",
    "23": "<free text answer>",
    "24": "<free text answer>",
    "25": "<free text answer>"
  }
}
'''    
    prompt = f"""
You are an assistant that extracts answers from an image.
Write only the Alphabet(A,B,C,D,E,F) of the paper in the \"Paper name\" field.
The image is a screenshot of an answer sheet containing 25 questions.
For questions 1 to 20, the answers are multiple-choice selections.
For questions 21 to 25, the answers are free-text responses.
Extract the answer for each question (1 to 25) and provide the result in JSON using the format below:
{output_format}
"""
    response = CLIENT.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt, image_input]
    )
    return response.text


def preprocess_pdf_last_page(image: Image.Image) -> Image.Image:
    """
    Preprocesses the last page PIL image:
      - Convert to OpenCV BGR
      - Mask vertical region
      - Crop to mask
      - Unsharp mask sharpen
      - Enhance with PIL
    """
    # Convert to BGR
    img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    h, w = img_cv.shape[:2]

    # Mask
    mask = np.zeros((h, w), dtype="uint8")
    top, bottom = int(h * 0.14), int(h * 0.73)
    cv2.rectangle(mask, (0, top), (w, h - bottom), 255, -1)
    masked = cv2.bitwise_and(img_cv, img_cv, mask=mask)

    # Crop
    coords = cv2.findNonZero(mask)
    x, y, cw, ch = cv2.boundingRect(coords)
    cropped = masked[y:y+ch, x:x+cw]

    # Sharpen
    blurred = cv2.GaussianBlur(cropped, (0, 0), sigmaX=3)
    sharpened = cv2.addWeighted(cropped, 1.5, blurred, -0.5, 0)

    # PIL enhancements
    pil2 = Image.fromarray(cv2.cvtColor(sharpened, cv2.COLOR_BGR2RGB))
    pil2 = ImageEnhance.Sharpness(pil2).enhance(1.3)
    pil2 = ImageEnhance.Contrast(pil2).enhance(1.4)
    pil2 = ImageEnhance.Brightness(pil2).enhance(1.1)
    return pil2


def parse_info_with_gemini(pil_img: Image.Image) -> dict:
    """
    Calls Gemini on a header image to extract candidate info fields.
    """
    output_format = '''
Answer in the following JSON format. Do not write anything else:
{
  "Candidate Info": {
    "Paper": "<paper>",
    "Level": "<level>",
    "Candidate Name": "<name>",
    "Candidate Number": "<number>",
    "School": "<school>",
    "Country": "<country>",
    "grade level": "<grade level>",
    "Date": "<date>"
  }
}
'''
    prompt = f"""
You are a helper that accurately reads a sharpened exam header image and extracts exactly these fields:
  β€’ Paper (e.g. \"B\")
  β€’ Level (e.g. \"MIDDLE PRIMARY\")
  β€’ Candidate Name
  β€’ Candidate Number
  β€’ School
  β€’ Country
  β€’ grade level
  β€’ Date (with time)
Return **only** valid JSON in this format:
{output_format}
"""
    response = CLIENT.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt, pil_img]
    )
    return extract_json_from_output(response.text)


def extract_candidate_data(image: Image.Image) -> dict:
    """
    Preprocess last page and parse candidate info.
    """
    prepped = preprocess_pdf_last_page(image)
    info = parse_info_with_gemini(prepped)
    return info


def parse_mcq_answers(pil_image: Image.Image) -> str:
    """
    Extracts MCQ answers 1–10 from an image.
    """
    output_format = '''
Answer in the following JSON format. Do not write anything else:
{
  "Answers": {
    "1": "<option>",
    "2": "<option>",
    "3": "<option>",
    "4": "<option>",
    "5": "<option>",
    "6": "<option>",
    "7": "<option>",
    "8": "<option>",
    "9": "<option>",
    "10": "<option>"
  }
}
'''
    prompt = f"""
You are an assistant that extracts MCQ answers from an image.
The image is a screenshot of a 10-question multiple-choice answer sheet.
Extract which option is marked for each question (1–10) and provide the answers in JSON:
{output_format}
"""
    response = CLIENT.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt, pil_image]
    )
    return response.text


def get_mcqs1st(pil_image: Image.Image) -> dict:
    """
    Mask, crop, enhance, and parse MCQs 1–10.
    """
    img_cv = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    h, w = img_cv.shape[:2]
    mask = np.zeros((h, w), dtype="uint8")
    top, bot, right = int(h*0.30), int(h*0.44), int(w*0.35)
    cv2.rectangle(mask, (0, top), (right, h-bot), 255, -1)
    masked = cv2.bitwise_and(img_cv, img_cv, mask=mask)
    coords = cv2.findNonZero(mask)
    x, y, cw, ch = cv2.boundingRect(coords)
    cropped = masked[y:y+ch, x:x+cw]
    blur = cv2.GaussianBlur(cropped, (0,0), sigmaX=3)
    sharp = cv2.addWeighted(cropped, 1.5, blur, -0.5, 0)
    pil_sh = Image.fromarray(cv2.cvtColor(sharp, cv2.COLOR_BGR2RGB))
    pil_sh = ImageEnhance.Sharpness(pil_sh).enhance(1.3)
    pil_sh = ImageEnhance.Contrast(pil_sh).enhance(1.4)
    final = ImageEnhance.Brightness(pil_sh).enhance(1.1)
    raw = parse_mcq_answers(final)
    return extract_json_from_output(raw)


def parse_mcq_answers_11_20(pil_image: Image.Image) -> str:
    """
    Extracts MCQ answers 11–20 from an image.
    """
    output_format = '''
Answer in the following JSON format. Do not write anything else:
{
  "Answers": {
    "11": "<option>",
    "12": "<option>",
    "13": "<option>",
    "14": "<option>",
    "15": "<option>",
    "16": "<option>",
    "17": "<option>",
    "18": "<option>",
    "19": "<option>",
    "20": "<option>"
  }
}
'''
    prompt = f"""
You are an assistant that extracts MCQ answers from an image.
The image is a screenshot of questions 11–20.
Extract the marked option for each and return JSON:
{output_format}
"""
    response = CLIENT.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt, pil_image]
    )
    return response.text


def get_mcqs2nd(pil_image: Image.Image) -> dict:
    """
    Mask, crop, enhance, and parse MCQs 11–20.
    """
    img_cv = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    h, w = img_cv.shape[:2]
    mask = np.zeros((h, w), dtype="uint8")
    top, bottom, right = int(h*0.56), int(h*0.21), int(w*0.35)
    cv2.rectangle(mask, (0, top), (right, h-bottom), 255, -1)
    masked = cv2.bitwise_and(img_cv, img_cv, mask=mask)
    coords = cv2.findNonZero(mask)
    x, y, cw, ch = cv2.boundingRect(coords)
    cropped = masked[y:y+ch, x:x+cw]
    blurred = cv2.GaussianBlur(cropped, (0,0), sigmaX=3)
    sharp = cv2.addWeighted(cropped, 1.5, blurred, -0.5, 0)
    pil_sharp = Image.fromarray(cv2.cvtColor(sharp, cv2.COLOR_BGR2RGB))
    pil_sharp = ImageEnhance.Sharpness(pil_sharp).enhance(1.3)
    pil_sharp = ImageEnhance.Contrast(pil_sharp).enhance(1.4)
    final_pil = ImageEnhance.Brightness(pil_sharp).enhance(1.1)
    raw = parse_mcq_answers_11_20(final_pil)
    return extract_json_from_output(raw)


def parse_text_answers(pil_image: Image.Image) -> str:
    """
    Extracts free-text answers 21–25 from an image.
    """
    output_format = '''
Answer in the following JSON format. Do not write anything else:
{
  "Answers": {
    "21": "<text>",
    "22": "<text>",
    "23": "<text>",
    "24": "<text>",
    "25": "<text>"
  }
}
'''
    prompt = f"""
You are an assistant that extracts free-text answers from an image.
The image shows answers to questions 21–25.
Extract the text for each and return JSON:
{output_format}
"""
    response = CLIENT.models.generate_content(
        model="gemini-2.0-flash",
        contents=[prompt, pil_image]
    )
    return response.text


def get_answer(pil_image: Image.Image) -> dict:
    """
    Mask, crop, enhance, and parse free-text 21–25.
    """
    img_cv = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    h, w = img_cv.shape[:2]
    mask = np.zeros((h, w), dtype="uint8")
    top, bottom = int(h*0.31), int(h*0.31)
    left, right = int(w*0.35), int(w*0.66)
    cv2.rectangle(mask, (left, top), (right, h-bottom), 255, -1)
    masked = cv2.bitwise_and(img_cv, img_cv, mask=mask)
    coords = cv2.findNonZero(mask)
    x, y, cw, ch = cv2.boundingRect(coords)
    cropped = masked[y:y+ch, x:x+cw]
    blurred = cv2.GaussianBlur(cropped, (0,0), sigmaX=3)
    sharp = cv2.addWeighted(cropped, 1.5, blurred, -0.5, 0)
    pil_sharp = Image.fromarray(cv2.cvtColor(sharp, cv2.COLOR_BGR2RGB))
    pil_sharp = ImageEnhance.Sharpness(pil_sharp).enhance(1.3)
    pil_sharp = ImageEnhance.Contrast(pil_sharp).enhance(1.4)
    final_pil = ImageEnhance.Brightness(pil_sharp).enhance(1.1)
    raw = parse_text_answers(final_pil)
    return extract_json_from_output(raw)


def infer_page(pil_image: Image.Image) -> dict:
    """
    Full pipeline for a single exam page.
    """
    student_info = extract_candidate_data(pil_image)
    mcq1 = get_mcqs1st(pil_image) or {}
    mcq2 = get_mcqs2nd(pil_image) or {}
    free_txt = get_answer(pil_image) or {}
    all_answers = {**mcq1.get("Answers", {}), **mcq2.get("Answers", {}), **free_txt.get("Answers", {})}
    return {"Candidate Info": student_info.get("Candidate Info", {}), "Answers": all_answers}


def infer_all_pages(pdf_path: str) -> dict:
    """
    Processes every page in the PDF and infers student data.
    """
    results = {}
    pages = convert_from_path(pdf_path)
    for idx, page in enumerate(pages, start=1):
        data = infer_page(page)
        info = data.get("Candidate Info", {})
        key = info.get("Candidate Number") or f"Page_{idx}"
        if data.get("Answers"):
            results[key] = data
    return results


def load_answer_key(pdf_path: str) -> dict:
    """
    Parses the official answer-key PDF into a dict of paper->answers.
    """
    images = convert_from_path(pdf_path)
    key_dict = {}
    for page in images:
        raw = parse_all_answers(page)
        parsed = extract_json_from_output(raw)
        name = parsed.get("Paper name", {}).get("name")
        key_dict[name] = parsed.get("Answers", {})
    return key_dict


def grade_page(student_page_data: dict, answer_key_dict: dict) -> dict:
    """
    Grades a single student page against the loaded key.
    """
    paper = student_page_data.get("Candidate Info", {}).get("Paper")
    correct = answer_key_dict.get(paper, {})
    student_ans = student_page_data.get("Answers", {})
    total_q = len(correct)
    correct_count = 0
    detailed = {}
    for q, key_ans in correct.items():
        stud_ans = student_ans.get(q, "")
        is_corr = str(stud_ans).strip().upper() == str(key_ans).strip().upper()
        if is_corr:
            correct_count += 1
        detailed[q] = {"Correct Answer": key_ans, "Student Answer": stud_ans, "Is Correct": is_corr}
    percentage = round(correct_count/total_q*100, 2) if total_q else 0.0
    return {"Candidate Info": student_page_data.get("Candidate Info", {}), "Total Marks": correct_count, "Total Questions": total_q, "Percentage": percentage, "Detailed Results": detailed}


def grade_all_students(answer_key_pdf: str, student_pdf: str, out_json: str = "results.json") -> dict:
    """
    Loads key, infers all students, grades them, and writes JSON.
    """
    key_dict = load_answer_key(answer_key_pdf)
    students = infer_all_pages(student_pdf)
    results = {}
    for cand, data in students.items():
        results[cand] = grade_page(data, key_dict)
    with open(out_json, "w") as f:
        json.dump(results, f, indent=2)
    return results