import gradio as gr
import tempfile
import tesserocr
import os
import fitz  # PyMuPDF, imported as fitz for backward compatibility reasons
from PIL import Image
import logging
from multiprocessing.pool import Pool

logging.basicConfig(level=logging.INFO)

APIs = {
    "pol": tesserocr.PyTessBaseAPI(lang="pol", path="./tessdata"),
    "eng": tesserocr.PyTessBaseAPI(),
}

def pdf_to_image(pdf_file, path, progress, max_pages):
    # Convert the PDF to a PNG image using pdf2image
    doc = fitz.open(pdf_file.name)  # open document
    fnames = []
    idx = 1
    total = len(doc) if max_pages == 0 else max_pages
    for page in progress.tqdm(doc, desc="Converting PDF to image", total=total):
        pix = page.get_pixmap()
        output = f"{path}/page-{idx}.png"
        pix.save(output)
        fnames.append(output)
        idx += 1
        if max_pages > 0 and idx > max_pages:
            break
    return fnames

def tesseract_ocr(image, language, max_pages, progress=gr.Progress()):
    api = APIs[language]

    # Run OCR on the image using Tesseract
    with tempfile.TemporaryDirectory() as path:
        images = pdf_to_image(image, path, progress, max_pages)
        text_res = []
        for img in progress.tqdm(images, desc="Running OCR"):
            with open(img, 'rb') as f:
                img = Image.open(f)
                img.load()
                api.SetImage(img)
                text = api.GetUTF8Text()
                text_res.append(text)

    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as file:
        file.write("\n".join(text_res))
        return file.name


if __name__ == "__main__":
    logging.info("Starting Tesseract OCR")
    iface = gr.Interface(
        fn=tesseract_ocr,
        inputs=[
            gr.File(label="PDF file"),
            gr.Dropdown(["eng", "pol"], label="Language", value="eng"),
            gr.Number(label="Number of pages", value=0)
        ],
        outputs=gr.File(label="Text file", type="file"),
        title="PDF to Text Converter",
        description="Converts a PDF file to text using Tesseract OCR."
    ).queue(concurrency_count=10)

    iface.launch(server_port=7860, server_name="0.0.0.0")