import gradio as gr import pytesseract import cv2 def extract_text(image): """ Extracts text from an uploaded image using PyTesseract. Args: image: A PIL Image object or OpenCV image array containing the bill or document. Returns: str: The extracted text from the image. """ # Handle potential errors gracefully try: # Preprocess the image for better text recognition (optional) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # Use PyTesseract for text extraction text = pytesseract.image_to_string(thresh, config='--psm 6') # Optimize for single block text return text except Exception as e: return f"Error: {e}" # Informative error message # Gradio interface for user interaction iface = gr.Interface( fn=extract_text, inputs=gr.File(label="Upload Bill/Document (Image)"), outputs=gr.Textbox(label="Extracted Text"), title="Text Extractor for Bills and Documents", description="Upload your bill or document to extract the text content.", ) # Launch the Gradio app iface.launch()