Spaces:

prat1003
/

test1

Sleeping

App Files Files Community

test1 / app.py

prat1003

Update app.py

c6daf95 verified 5 months ago

raw

history blame contribute delete

2.83 kB

	import gradio as gr
	import pdfplumber
	import re
	import json
	from xml.etree.ElementTree import Element, SubElement, tostring

	def extract_questions(text):
	"""
	Extract questions, options, and scores from text.
	Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
	"""
	questions = []
	q_blocks = re.split(r'Q\d+:', text)

	for q in q_blocks[1:]: # skip the first split part
	# Extract question text
	q_text_match = re.search(r'(.*?)Options:', q, re.S)
	q_text = q_text_match.group(1).strip() if q_text_match else q.strip()

	# Extract options
	options = []
	option_matches = re.findall(r'([a-z]\))\s(.?)\s*\((\d+)\)', q, re.S)
	for _, opt_text, score in option_matches:
	options.append({
	"optiontext": opt_text.strip(),
	"score": score,
	"img": ""
	})

	questions.append({
	"questiontext": f"<p>{q_text}</p>",
	"questiontype": "single_select",
	"randomizeopt": False,
	"marks": max([int(o["score"]) for o in options]) if options else 0,
	"options": options,
	"minscore": "",
	"hint": "",
	"numberofoptions": len(options)
	})
	return questions

	def read_pdf(file, format_type):
	if file is None:
	return "Please upload a PDF file."

	text = ""
	with pdfplumber.open(file.name) as pdf:
	for page in pdf.pages:
	text += page.extract_text() or ""

	# Check for totalmarks, time, cutoff
	totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
	time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
	cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)

	if not (totalmarks_match and time_match and cutoff_match):
	return "PDF must contain Total Marks, Time, and Cutoff."

	totalmarks = totalmarks_match.group(1)
	time = time_match.group(1)
	cutoff = cutoff_match.group(1)

	# Extract questions
	questions = extract_questions(text)

	data = {
	"title": "Certification Title",
	"totalmarks": totalmarks,
	"time": time,
	"cutoff": cutoff,
	"failurl": "",
	"passurl": "",
	"sendpassemail": True,
	"questions": json.dumps({"questions": questions})
	}

	if format_type == "HTML":
	html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
	return html
	else:
	xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
	return xml_content

	app = gr.Interface(
	fn=read_pdf,
	inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")],
	outputs="text",
	title="PDF to HTML/XML Converter"
	)

	app.launch()