ocrr / app.py
Atchyuteswar's picture
Create app.py
f335964 verified
raw
history blame contribute delete
No virus
1.14 kB
import gradio as gr
import pytesseract
import cv2
def extract_text(image):
"""
Extracts text from an uploaded image using PyTesseract.
Args:
image: A PIL Image object or OpenCV image array containing the bill or document.
Returns:
str: The extracted text from the image.
"""
# Handle potential errors gracefully
try:
# Preprocess the image for better text recognition (optional)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# Use PyTesseract for text extraction
text = pytesseract.image_to_string(thresh, config='--psm 6') # Optimize for single block text
return text
except Exception as e:
return f"Error: {e}" # Informative error message
# Gradio interface for user interaction
iface = gr.Interface(
fn=extract_text,
inputs=gr.File(label="Upload Bill/Document (Image)"),
outputs=gr.Textbox(label="Extracted Text"),
title="Text Extractor for Bills and Documents",
description="Upload your bill or document to extract the text content.",
)
# Launch the Gradio app
iface.launch()