Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pytesseract | |
import cv2 | |
def extract_text(image): | |
""" | |
Extracts text from an uploaded image using PyTesseract. | |
Args: | |
image: A PIL Image object or OpenCV image array containing the bill or document. | |
Returns: | |
str: The extracted text from the image. | |
""" | |
# Handle potential errors gracefully | |
try: | |
# Preprocess the image for better text recognition (optional) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] | |
# Use PyTesseract for text extraction | |
text = pytesseract.image_to_string(thresh, config='--psm 6') # Optimize for single block text | |
return text | |
except Exception as e: | |
return f"Error: {e}" # Informative error message | |
# Gradio interface for user interaction | |
iface = gr.Interface( | |
fn=extract_text, | |
inputs=gr.File(label="Upload Bill/Document (Image)"), | |
outputs=gr.Textbox(label="Extracted Text"), | |
title="Text Extractor for Bills and Documents", | |
description="Upload your bill or document to extract the text content.", | |
) | |
# Launch the Gradio app | |
iface.launch() | |