Spaces:

Ayan8901
/

exam-notes-ocr

Running

App Files Files Community

exam-notes-ocr / main.py

Ayan8901

Update main.py

511b6c3 verified 1 day ago

raw

history blame contribute delete

3.91 kB

	from fastapi import FastAPI, UploadFile, File, Form
	from fastapi.middleware.cors import CORSMiddleware
	from PIL import Image, ImageOps
	import io
	import os
	import base64
	import httpx

	app = FastAPI()

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"


	def fix_image_orientation(img: Image.Image) -> Image.Image:
	try:
	img = ImageOps.exif_transpose(img)
	except Exception:
	pass
	w, h = img.size
	if w > h:
	img = img.rotate(90, expand=True)
	return img


	def resize_for_ocr(img: Image.Image, max_width: int = 1600) -> Image.Image:
	w, h = img.size
	if w > max_width:
	ratio = max_width / w
	img = img.resize((max_width, int(h * ratio)), Image.LANCZOS)
	return img


	def image_to_base64(img: Image.Image) -> str:
	buf = io.BytesIO()
	img.save(buf, format="JPEG", quality=90)
	return base64.b64encode(buf.getvalue()).decode("utf-8")


	@app.get("/")
	def root():
	return {"status": "OCR running"}


	@app.post("/ocr")
	async def ocr_images(
	file: UploadFile = File(...),
	mode: str = Form("print")
	):
	try:
	if not GROQ_API_KEY:
	return {"success": False, "error": "GROQ_API_KEY not set in Space secrets."}

	contents = await file.read()
	pil_image = Image.open(io.BytesIO(contents)).convert("RGB")
	pil_image = fix_image_orientation(pil_image)
	pil_image = resize_for_ocr(pil_image)

	b64 = image_to_base64(pil_image)

	prompt = (
	"Extract ALL the text from this image exactly as it appears. "
	"Preserve paragraph structure and line breaks. "
	"Do not summarize, translate, or add anything. "
	"Only output the raw extracted text, nothing else."
	)

	async with httpx.AsyncClient(timeout=60.0) as client:
	response = await client.post(
	GROQ_URL,
	headers={
	"Authorization": f"Bearer {GROQ_API_KEY}",
	"Content-Type": "application/json",
	},
	json={
	"model": "meta-llama/llama-4-scout-17b-16e-instruct",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{b64}"
	}
	},
	{
	"type": "text",
	"text": prompt
	}
	]
	}
	],
	"max_tokens": 4096,
	"temperature": 0.0,
	},
	)

	result = response.json()

	if "error" in result:
	error_msg = result["error"].get("message", "Groq vision error")
	print(f"Groq vision error: {error_msg}")
	return {"success": False, "error": error_msg}

	if not result.get("choices"):
	print(f"Groq empty choices: {result}")
	return {"success": False, "error": "No response from vision model"}

	text = result["choices"][0]["message"]["content"].strip()

	if not text or len(text) < 10:
	return {
	"success": False,
	"error": "No text found in image. Try a clearer photo."
	}

	return {"success": True, "text": text}

	except Exception as e:
	return {"success": False, "error": str(e)}