test1 / app.py
prat1003's picture
Update app.py
c6daf95 verified
import gradio as gr
import pdfplumber
import re
import json
from xml.etree.ElementTree import Element, SubElement, tostring
def extract_questions(text):
"""
Extract questions, options, and scores from text.
Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
"""
questions = []
q_blocks = re.split(r'Q\d+:', text)
for q in q_blocks[1:]: # skip the first split part
# Extract question text
q_text_match = re.search(r'(.*?)Options:', q, re.S)
q_text = q_text_match.group(1).strip() if q_text_match else q.strip()
# Extract options
options = []
option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
for _, opt_text, score in option_matches:
options.append({
"optiontext": opt_text.strip(),
"score": score,
"img": ""
})
questions.append({
"questiontext": f"<p>{q_text}</p>",
"questiontype": "single_select",
"randomizeopt": False,
"marks": max([int(o["score"]) for o in options]) if options else 0,
"options": options,
"minscore": "",
"hint": "",
"numberofoptions": len(options)
})
return questions
def read_pdf(file, format_type):
if file is None:
return "Please upload a PDF file."
text = ""
with pdfplumber.open(file.name) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
# Check for totalmarks, time, cutoff
totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
if not (totalmarks_match and time_match and cutoff_match):
return "PDF must contain Total Marks, Time, and Cutoff."
totalmarks = totalmarks_match.group(1)
time = time_match.group(1)
cutoff = cutoff_match.group(1)
# Extract questions
questions = extract_questions(text)
data = {
"title": "Certification Title",
"totalmarks": totalmarks,
"time": time,
"cutoff": cutoff,
"failurl": "",
"passurl": "",
"sendpassemail": True,
"questions": json.dumps({"questions": questions})
}
if format_type == "HTML":
html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
return html
else:
xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
return xml_content
app = gr.Interface(
fn=read_pdf,
inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")],
outputs="text",
title="PDF to HTML/XML Converter"
)
app.launch()