Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import fitz # PyMuPDF for handling PDFs | |
| from transformers import AutoModelForVision2Seq, AutoProcessor | |
| import torch | |
| import torchvision | |
| from PIL import Image | |
| import io | |
| # Initialize the OCR model and processor from Hugging Face | |
| model_name = "allenai/olmOCR-2-7B-1025-FP8" | |
| processor = AutoProcessor.from_pretrained(model_name) | |
| model = AutoModelForVision2Seq.from_pretrained(model_name) | |
| # Function to perform OCR on a PDF, page by page using olmocr | |
| def ocr_pdf(pdf_file): | |
| # Open the PDF with PyMuPDF | |
| doc = fitz.open(pdf_file.name) | |
| ocr_results = [] # To store OCR results for each page | |
| for page_num in range(len(doc)): | |
| # Get the page and convert it to an image | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap() | |
| # Convert pixmap to image | |
| img = Image.open(io.BytesIO(pix.tobytes("png"))) | |
| # Process the image for OCR (olmocr expects image in a specific format) | |
| inputs = processor(images=img, return_tensors="pt") | |
| # Perform OCR using olmocr model | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs) | |
| # Decode the generated output (OCR text) | |
| ocr_text = processor.decode(outputs[0], skip_special_tokens=True) | |
| # Prepend page number to the OCR text | |
| page_result = f"Page {page_num + 1}:\n{ocr_text}" | |
| # Store result in list | |
| ocr_results.append(page_result) | |
| # Join all OCR results into one string (for displaying purposes) | |
| return "\n\n".join(ocr_results) | |
| # Gradio interface | |
| def create_gradio_interface(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### OCR of PDF Pages using olmocr Model") | |
| file_input = gr.File(label="Upload PDF", type="file") | |
| output_text = gr.Textbox(label="OCR Results", lines=15) | |
| file_input.change(ocr_pdf, inputs=file_input, outputs=output_text) | |
| return demo | |
| # Create and launch the Gradio app | |
| if __name__ == "__main__": | |
| app = create_gradio_interface() | |
| app.launch() | |