atz21 commited on
Commit
1091f22
·
verified ·
1 Parent(s): 37398bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -125
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import re
3
  import gradio as gr
4
  import google.generativeai as genai
5
  from markdown_pdf import MarkdownPdf, Section
@@ -22,65 +21,59 @@ Your objective is to align three sources per question/sub-question:
22
  ## Question X [and sub-question if applicable, e.g., ### (b)(ii)]
23
  *QP:* [Exact question text or [Not found]]
24
  *MS:* [Relevant markscheme section or [Not found]]
25
- *AS:* [Final cleaned student answer; use fenced code for mathematics; insert [illegible] or [No response] as required]
26
  ---
27
  3. Formatting requirements:
28
  - Use '##' for main questions, '###' for sub-questions.
29
- - Maintain section order: QP | MS | AS (always in that sequence).
30
  - Enclose all mathematical expressions in Markdown fenced code blocks (``` triple backticks).
31
- - If a diagram/graph is omitted, write [Graph omitted] in its place.
32
- - For unreadable portions of the student's answer, insert [illegible]; if the answer is wholly unreadable, set AS to [illegible].
33
- - If a question is skipped or unanswered, AS must be exactly [No response].
 
34
  - Keep MS annotations (e.g., M1, A1, R1) verbatim.
35
- - Diagrams/graphs are not to be recreated.
36
- - If any QP, MS, or AS content is missing, specify [Not found] for that section.
37
- - Ensure consistency and determinism in formatting so subsequent models can grade directly from this aligned format.
38
- - List all main questions and sub-questions in their original order, clearly denoting sub-questions (e.g., '### (b)(i)', '### (b)(ii)').
39
-
40
- After each alignment action, briefly validate that the content for QP, MS, and AS matches expectations and alignments are correct. If validation fails, self-correct or flag the issue.
41
 
42
  ## Example
43
  ---
44
  ## Question 1
45
- *QP:* Expand (1+x)^3
46
  *MS:* M1 for binomial expansion, A1 for coefficients, A1 for final form
47
- *AS:* x^3 + 3x^2 + 3x + 1
48
  ---
49
- ## Output Format
50
- Generate a single Markdown document. For each (sub-)question, output a structured block exactly in the prescribed format.
51
  """
52
  },
53
-
54
- "GRADING_PROMPT": {
55
- "role": "system",
56
- "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
57
 
58
  ## Grading Checklist
59
  - Assess each question part against the provided markscheme.
60
- - Award marks for correct methods (M), accurate answers (A), and clear reasoning (R) as specified.
61
- - Use Follow Through (FT) for correctly applied subsequent working that uses a previously incorrect answer.
62
- - Clearly indicate any lost marks and provide a concise reason for the deduction.
63
- - Always state both:
64
- 1. **What was wrong** (explain the error).
65
- 2. **What is right** (give the correct value, method, or reasoning from the markscheme).
66
- - Check for alternative valid methods and award marks accordingly.
67
- - Summarize the total marks and classify the types of errors made by the student.
68
- - Ensure the final output adheres to the specified Markdown table format.
69
 
70
  ### Abbreviations:
71
- - **M**: Marks for demonstrating a correct Method.
72
- - **A**: Marks for providing an accurate Answer.
73
- - **R**: Marks for clear Reasoning.
74
- - **AG**: Answer is given in the question—no marks awarded.
75
- - **FT**: Follow Through; award marks when candidates continue with their own previous (possibly incorrect) answers.
76
 
77
  ---
78
  ## Grading Instructions
79
  1. Award marks using official annotations (M1, A1, etc.).
80
  2. A marks generally require valid M marks.
81
- 3. Allow FT unless results are nonsensical.
82
  4. Accept valid alternative forms.
83
- 5. Apply accuracy requirements.
84
  6. Ignore crossed-out work unless requested otherwise.
85
  7. Mark only the first full solution unless otherwise indicated.
86
  8. Assume graphs/diagrams are correct if required.
@@ -94,21 +87,17 @@ Produce a GitHub-flavored Markdown table:
94
 
95
  Rules:
96
  - Each row matches a markable step.
97
- - For blanks, write “(no answer)” and indicate the lost mark(s).
98
- - Lost marks: wrap in red with `<span style="color:red">A0</span>` (or M0, R0) and make Reason column red.
99
  - Awarded marks remain plain text.
100
- - For partial awards (e.g., M1A0A1), only highlight lost marks.
101
- - **When a mark is lost, the Reason column must explain both the error AND the correct answer or method from the markscheme.**
102
-
103
- **New Rule (Per-Question Total):**
104
- - After each question (including all subparts), show marks obtained vs total in square brackets, e.g.:
105
- `[2/4]`
106
 
107
  ---
108
  ### Examiner’s Report
109
- At the end of the grading, provide a summary report in this format:
110
 
111
- Use codes:
112
  - A : All Good
113
  - B : Silly Mistake
114
  - C : Conceptual Error
@@ -120,18 +109,19 @@ Use codes:
120
  | 1 | 6/9 | C |
121
  | 2 | 7/7 | A |
122
  | 3 | 8/14 | D |
123
- | ... | ... | ... |
124
 
125
- At the end, display the grand total like:
126
  `Total: 40/61`
127
 
128
- Optionally, if reasons are available, extend with:
129
 
130
  | Question Number | Marks | Remark | Reason |
131
  |-----------------|-------|--------|--------|
132
 
133
- ⚠️ Do NOT add any "Validation" or meta commentary. End the output after the Examiner’s Report.
134
  """
 
135
  }
136
 
137
  # -------------------- CONFIG --------------------
@@ -146,13 +136,12 @@ def save_as_pdf(text, filename="output.pdf"):
146
 
147
  # ---------- HELPER: Compress PDF ----------
148
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
149
- """Compress PDF using Ghostscript if larger than max_size (default 20MB)."""
150
  if output_path is None:
151
  base, ext = os.path.splitext(input_path)
152
  output_path = f"{base}_compressed{ext}"
153
 
154
  if os.path.getsize(input_path) <= max_size:
155
- return input_path # No compression needed
156
 
157
  try:
158
  gs_cmd = [
@@ -164,8 +153,10 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
164
  ]
165
  subprocess.run(gs_cmd, check=True)
166
  if os.path.getsize(output_path) <= max_size:
 
167
  return output_path
168
  else:
 
169
  return input_path
170
  except Exception as e:
171
  print(f"⚠️ Compression error: {e}")
@@ -174,114 +165,55 @@ def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
174
  # ---------- HELPER: Create Model with Fallback ----------
175
  def create_model():
176
  try:
 
177
  return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
178
  except Exception:
 
179
  return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
180
 
181
- # ---------- NEW: Pretty math conversion ----------
182
- _SUPER_MAP = {
183
- "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴",
184
- "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹",
185
- "+": "⁺", "-": "⁻", "(": "⁽", ")": "⁾",
186
- "n": "ⁿ", "i": "ⁱ", "a": "ᵃ", "b": "ᵇ", "c": "ᶜ", "d": "ᵈ", "e": "ᵉ",
187
- "o": "ᵒ", "r": "ʳ", "t": "ᵗ", "u": "ᵘ", "v": "ᵛ", "w": "ʷ", "x": "ˣ", "y": "ʸ"
188
- }
189
-
190
- def _to_superscript(s: str) -> str:
191
- """Map characters to available Unicode superscripts; fallback to original char if unavailable."""
192
- out = []
193
- for ch in s:
194
- out.append(_SUPER_MAP.get(ch, _SUPER_MAP.get(ch.lower(), ch)))
195
- return "".join(out)
196
-
197
- def pretty_math(text: str) -> str:
198
- """
199
- Convert caret-notation exponents to Unicode superscripts.
200
-
201
- Handles:
202
- x^2, x^{12}, x^(12), 10^4, (3x10^4)^3, and similar patterns.
203
- Only characters with known superscripts are converted (digits, +-(), some letters).
204
- """
205
-
206
- if not text:
207
- return text
208
-
209
- new = text
210
-
211
- # Convert instances like ^{...}
212
- new = re.sub(r'\^\{\s*([^}]+)\s*\}', lambda m: _to_superscript(m.group(1)), new)
213
-
214
- # Convert instances like ^(...)
215
- new = re.sub(r'\^\(\s*([^\)]+)\s*\)', lambda m: _to_superscript(m.group(1)), new)
216
-
217
- # Convert caret followed by a simple integer (e.g., ^12)
218
- new = re.sub(r'\^([+-]?\d+)', lambda m: _to_superscript(m.group(1)), new)
219
-
220
- # Convert caret followed by single non-space token (e.g., x^n)
221
- new = re.sub(r'\^([A-Za-z0-9\+\-\(\)]+)', lambda m: _to_superscript(m.group(1)), new)
222
-
223
- # Replace common scientific notation '3x10^4' -> '3×10⁴' when 'x' is used as multiplication
224
- # Only apply when the 'x' sits between digits and '10' (heuristic)
225
- new = re.sub(r'(\d)\s*[xX]\s*(10)', r'\1×\2', new)
226
-
227
- # Also compact spaced forms: '3 x 10^4' -> '3×10⁴' (keeps previously superscripted exponent)
228
- new = re.sub(r'(\d)\s*×\s*(10)', r'\1×\2', new)
229
-
230
- return new
231
-
232
- # -------------------- PIPELINE: ALIGN + GRADE --------------------
233
  def align_and_grade(qp_file, ms_file, ans_file):
234
  try:
235
- # Step 0: Compress if needed
236
  qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
237
  ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
238
  ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
239
 
240
- # Step 1: Uploads
241
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
242
  ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
243
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
244
 
245
  model = create_model()
246
 
247
- # Step 2: Alignment (raw)
248
  resp = model.generate_content([
249
  PROMPTS["ALIGNMENT_PROMPT"]["content"],
250
  qp_uploaded,
251
  ms_uploaded,
252
  ans_uploaded
253
  ])
254
- aligned_text_raw = getattr(resp, "text", None)
255
- if not aligned_text_raw and getattr(resp, "candidates", None):
256
- aligned_text_raw = resp.candidates[0].content.parts[0].text
257
-
258
- # Pretty version for display/PDF (does NOT affect the raw text used for grading)
259
- aligned_text_pretty = pretty_math(aligned_text_raw) if aligned_text_raw else aligned_text_raw
260
- aligned_pdf_path = save_as_pdf(aligned_text_pretty or "[No aligned text produced]", "aligned_qp_ms_as.pdf")
261
 
262
- # Step 3: Grading (use raw aligned text as input to grader)
263
  response = model.generate_content([
264
  PROMPTS["GRADING_PROMPT"]["content"],
265
- aligned_text_raw or "[No aligned text produced]"
266
  ])
267
- grading_raw = getattr(response, "text", None)
268
- if not grading_raw and getattr(response, "candidates", None):
269
- grading_raw = response.candidates[0].content.parts[0].text
270
 
271
- # Pretty version of grading output for display/PDF
272
- grading_pretty = pretty_math(grading_raw) if grading_raw else grading_raw
273
  base_name = os.path.splitext(os.path.basename(ans_file))[0]
274
- grading_pdf_path = save_as_pdf(grading_pretty or "[No grading produced]", f"{base_name}_graded.pdf")
275
 
276
- # Return pretty/display versions (raws remain unmodified for internal processing)
277
- return aligned_text_pretty or "", aligned_pdf_path, grading_pretty or "", grading_pdf_path
278
 
279
  except Exception as e:
280
  return f"❌ Error: {e}", None, None, None
281
 
282
  # ---------- GRADIO APP ----------
283
  with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading)") as demo:
284
- gr.Markdown("## LeadIB AI Grading\nUpload Question Paper, Markscheme, and Student Answer Sheet.\nThe system will align and grade automatically. Mathematical exponents (e.g. `x^2`) will be converted to Unicode superscripts (e.g. `x²`) in the displayed text and PDFs.")
285
 
286
  with gr.Row():
287
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")
 
1
  import os
 
2
  import gradio as gr
3
  import google.generativeai as genai
4
  from markdown_pdf import MarkdownPdf, Section
 
21
  ## Question X [and sub-question if applicable, e.g., ### (b)(ii)]
22
  *QP:* [Exact question text or [Not found]]
23
  *MS:* [Relevant markscheme section or [Not found]]
24
+ *AS:* [Final cleaned student answer; use fenced code for mathematics with superscripts/subscripts; insert [illegible] or [No response] as required]
25
  ---
26
  3. Formatting requirements:
27
  - Use '##' for main questions, '###' for sub-questions.
28
+ - Maintain section order: QP | MS | AS.
29
  - Enclose all mathematical expressions in Markdown fenced code blocks (``` triple backticks).
30
+ - Use proper superscripts/subscripts (x² not x^2, H₂O not H2O).
31
+ - If a diagram/graph is omitted, write [Graph omitted].
32
+ - For unreadable portions, insert [illegible].
33
+ - If a question is skipped or unanswered, AS must be [No response].
34
  - Keep MS annotations (e.g., M1, A1, R1) verbatim.
35
+ - Do not recreate diagrams/graphs.
36
+ - If any QP, MS, or AS content is missing, specify [Not found].
37
+ - List all main questions and sub-questions in original order.
38
+ - After each alignment action, validate that QP, MS, and AS match expectations; if not, self-correct.
 
 
39
 
40
  ## Example
41
  ---
42
  ## Question 1
43
+ *QP:* Expand (1+x)³
44
  *MS:* M1 for binomial expansion, A1 for coefficients, A1 for final form
45
+ *AS:* x³ + 3x² + 3x + 1
46
  ---
 
 
47
  """
48
  },
49
+ "GRADING_PROMPT": {
50
+ "role": "system",
51
+ "content": """Developer: You are an official examiner. Apply the following grading rules precisely.
 
52
 
53
  ## Grading Checklist
54
  - Assess each question part against the provided markscheme.
55
+ - Award marks for correct methods (M), accurate answers (A), and clear reasoning (R).
56
+ - Use Follow Through (FT) for correctly applied subsequent working using a previous error.
57
+ - Always state BOTH:
58
+ 1. What was wrong (the error).
59
+ 2. What is right (the correct method/answer from markscheme).
60
+ - Summarize total marks and classify error types.
61
+ - End with an Examiner’s Report table.
 
 
62
 
63
  ### Abbreviations:
64
+ - **M**: Method
65
+ - **A**: Accuracy/Answer
66
+ - **R**: Reasoning
67
+ - **AG**: Answer given (no marks)
68
+ - **FT**: Follow Through
69
 
70
  ---
71
  ## Grading Instructions
72
  1. Award marks using official annotations (M1, A1, etc.).
73
  2. A marks generally require valid M marks.
74
+ 3. Allow FT unless result is nonsensical.
75
  4. Accept valid alternative forms.
76
+ 5. Apply accuracy requirements (default 3 s.f. if not stated).
77
  6. Ignore crossed-out work unless requested otherwise.
78
  7. Mark only the first full solution unless otherwise indicated.
79
  8. Assume graphs/diagrams are correct if required.
 
87
 
88
  Rules:
89
  - Each row matches a markable step.
90
+ - For blanks, write “(no answer)” and indicate lost mark(s).
91
+ - Lost marks: wrap in red with `<span style="color:red">A0</span>` (or M0, R0) and make Reason column red. Always also show the correct method/answer.
92
  - Awarded marks remain plain text.
93
+ - For partial awards (M1A0A1), highlight only lost marks.
94
+ - After each question, show total in square brackets: `[2/4]`.
 
 
 
 
95
 
96
  ---
97
  ### Examiner’s Report
98
+ At the very end, provide a summary table:
99
 
100
+ Codes:
101
  - A : All Good
102
  - B : Silly Mistake
103
  - C : Conceptual Error
 
109
  | 1 | 6/9 | C |
110
  | 2 | 7/7 | A |
111
  | 3 | 8/14 | D |
112
+ | | | |
113
 
114
+ Then show total clearly:
115
  `Total: 40/61`
116
 
117
+ Optionally, if reasons are available, extend with:
118
 
119
  | Question Number | Marks | Remark | Reason |
120
  |-----------------|-------|--------|--------|
121
 
122
+ ⚠️ Do NOT add any "Validation" or meta commentary. End the output after Examiner’s Report.
123
  """
124
+ }
125
  }
126
 
127
  # -------------------- CONFIG --------------------
 
136
 
137
  # ---------- HELPER: Compress PDF ----------
138
  def compress_pdf(input_path, output_path=None, max_size=20*1024*1024):
 
139
  if output_path is None:
140
  base, ext = os.path.splitext(input_path)
141
  output_path = f"{base}_compressed{ext}"
142
 
143
  if os.path.getsize(input_path) <= max_size:
144
+ return input_path
145
 
146
  try:
147
  gs_cmd = [
 
153
  ]
154
  subprocess.run(gs_cmd, check=True)
155
  if os.path.getsize(output_path) <= max_size:
156
+ print(f"✅ Compressed {input_path} → {output_path}")
157
  return output_path
158
  else:
159
+ print(f"⚠️ Compression failed to reduce below {max_size/1024/1024} MB")
160
  return input_path
161
  except Exception as e:
162
  print(f"⚠️ Compression error: {e}")
 
165
  # ---------- HELPER: Create Model with Fallback ----------
166
  def create_model():
167
  try:
168
+ print("⚡ Using gemini-2.5-pro model")
169
  return genai.GenerativeModel("gemini-2.5-pro", generation_config={"temperature": 0})
170
  except Exception:
171
+ print("⚡ Falling back to gemini-2.5-flash model")
172
  return genai.GenerativeModel("gemini-2.5-flash", generation_config={"temperature": 0})
173
 
174
+ # ---------- PIPELINE: ALIGN + GRADE ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def align_and_grade(qp_file, ms_file, ans_file):
176
  try:
 
177
  qp_file = compress_pdf(qp_file, "qp_compressed.pdf")
178
  ms_file = compress_pdf(ms_file, "ms_compressed.pdf")
179
  ans_file = compress_pdf(ans_file, "ans_compressed.pdf")
180
 
 
181
  qp_uploaded = genai.upload_file(path=qp_file, display_name="Question Paper")
182
  ms_uploaded = genai.upload_file(path=ms_file, display_name="Markscheme")
183
  ans_uploaded = genai.upload_file(path=ans_file, display_name="Answer Sheet")
184
 
185
  model = create_model()
186
 
 
187
  resp = model.generate_content([
188
  PROMPTS["ALIGNMENT_PROMPT"]["content"],
189
  qp_uploaded,
190
  ms_uploaded,
191
  ans_uploaded
192
  ])
193
+ aligned_text = getattr(resp, "text", None)
194
+ if not aligned_text and resp.candidates:
195
+ aligned_text = resp.candidates[0].content.parts[0].text
196
+ aligned_pdf_path = save_as_pdf(aligned_text, "aligned_qp_ms_as.pdf")
 
 
 
197
 
 
198
  response = model.generate_content([
199
  PROMPTS["GRADING_PROMPT"]["content"],
200
+ aligned_text
201
  ])
202
+ grading = getattr(response, "text", None)
203
+ if not grading and response.candidates:
204
+ grading = response.candidates[0].content.parts[0].text
205
 
 
 
206
  base_name = os.path.splitext(os.path.basename(ans_file))[0]
207
+ grading_pdf_path = save_as_pdf(grading, f"{base_name}_graded.pdf")
208
 
209
+ return aligned_text, aligned_pdf_path, grading, grading_pdf_path
 
210
 
211
  except Exception as e:
212
  return f"❌ Error: {e}", None, None, None
213
 
214
  # ---------- GRADIO APP ----------
215
  with gr.Blocks(title="LeadIB AI Grading (Alignment + Auto-Grading)") as demo:
216
+ gr.Markdown("## LeadIB AI Grading\nUpload Question Paper, Markscheme, and Student Answer Sheet.\nThe system will align and grade automatically.")
217
 
218
  with gr.Row():
219
  qp_file = gr.File(label="Upload Question Paper (PDF)", type="filepath")