Spaces:

thanthamky
/

tesseract

Sleeping

tesseract / app.py

Thantham

add files

9c5e6eb 3 months ago

No virus

1.16 kB

	from fastapi import FastAPI, File, UploadFile
	from PIL import Image
	import pytesseract
	import io
	import fitz

	app = FastAPI()

	@app.get("/test")
	async def test_api():
	return {"message": "API is working ..."}

	@app.post("/ocr_image")
	async def perform_ocr_image(image: UploadFile = File(...)):
	image_data = await image.read()

	image = Image.open(io.BytesIO(image_data))
	text = pytesseract.image_to_string(image, lang='tha+eng')
	return {"text": text}


	@app.post("/ocr_pdf")
	async def perform_ocr_pdf(image: UploadFile = File(...)):
	image_data = await image.read()

	doc = fitz.open(stream=image_data, filetype="pdf")

	page = doc[0] # Access the desired page (zero-indexed)

	# Generate a high-quality image of the page
	zoom = 2 # Adjust for better OCR resolution if needed
	mat = fitz.Matrix(zoom, zoom) # Zoom matrix
	pix = page.get_pixmap(matrix=mat)
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	# Analyze text orientation with Pytesseract
	#text_orientation = pytesseract.image_to_osd(img)
	text = pytesseract.image_to_string(img, lang='tha+eng')
	return {"text": text}