Spaces:
Running
Running
| from fastapi import FastAPI, UploadFile, File, Form | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from PIL import Image, ImageOps | |
| import io | |
| import os | |
| import base64 | |
| import httpx | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| def fix_image_orientation(img: Image.Image) -> Image.Image: | |
| try: | |
| img = ImageOps.exif_transpose(img) | |
| except Exception: | |
| pass | |
| w, h = img.size | |
| if w > h: | |
| img = img.rotate(90, expand=True) | |
| return img | |
| def resize_for_ocr(img: Image.Image, max_width: int = 1600) -> Image.Image: | |
| w, h = img.size | |
| if w > max_width: | |
| ratio = max_width / w | |
| img = img.resize((max_width, int(h * ratio)), Image.LANCZOS) | |
| return img | |
| def image_to_base64(img: Image.Image) -> str: | |
| buf = io.BytesIO() | |
| img.save(buf, format="JPEG", quality=90) | |
| return base64.b64encode(buf.getvalue()).decode("utf-8") | |
| def root(): | |
| return {"status": "OCR running"} | |
| async def ocr_images( | |
| file: UploadFile = File(...), | |
| mode: str = Form("print") | |
| ): | |
| try: | |
| if not GROQ_API_KEY: | |
| return {"success": False, "error": "GROQ_API_KEY not set in Space secrets."} | |
| contents = await file.read() | |
| pil_image = Image.open(io.BytesIO(contents)).convert("RGB") | |
| pil_image = fix_image_orientation(pil_image) | |
| pil_image = resize_for_ocr(pil_image) | |
| b64 = image_to_base64(pil_image) | |
| prompt = ( | |
| "Extract ALL the text from this image exactly as it appears. " | |
| "Preserve paragraph structure and line breaks. " | |
| "Do not summarize, translate, or add anything. " | |
| "Only output the raw extracted text, nothing else." | |
| ) | |
| async with httpx.AsyncClient(timeout=60.0) as client: | |
| response = await client.post( | |
| GROQ_URL, | |
| headers={ | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json", | |
| }, | |
| json={ | |
| "model": "meta-llama/llama-4-scout-17b-16e-instruct", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{b64}" | |
| } | |
| }, | |
| { | |
| "type": "text", | |
| "text": prompt | |
| } | |
| ] | |
| } | |
| ], | |
| "max_tokens": 4096, | |
| "temperature": 0.0, | |
| }, | |
| ) | |
| result = response.json() | |
| if "error" in result: | |
| error_msg = result["error"].get("message", "Groq vision error") | |
| print(f"Groq vision error: {error_msg}") | |
| return {"success": False, "error": error_msg} | |
| if not result.get("choices"): | |
| print(f"Groq empty choices: {result}") | |
| return {"success": False, "error": "No response from vision model"} | |
| text = result["choices"][0]["message"]["content"].strip() | |
| if not text or len(text) < 10: | |
| return { | |
| "success": False, | |
| "error": "No text found in image. Try a clearer photo." | |
| } | |
| return {"success": True, "text": text} | |
| except Exception as e: | |
| return {"success": False, "error": str(e)} |