import gradio as gr
import pytesseract
import cv2

def extract_text(image):
  """
  Extracts text from an uploaded image using PyTesseract.

  Args:
      image: A PIL Image object or OpenCV image array containing the bill or document.

  Returns:
      str: The extracted text from the image.
  """

  # Handle potential errors gracefully
  try:
    # Preprocess the image for better text recognition (optional)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    # Use PyTesseract for text extraction
    text = pytesseract.image_to_string(thresh, config='--psm 6')  # Optimize for single block text
    return text
  except Exception as e:
    return f"Error: {e}"  # Informative error message

# Gradio interface for user interaction
iface = gr.Interface(
    fn=extract_text,
    inputs=gr.File(label="Upload Bill/Document (Image)"),
    outputs=gr.Textbox(label="Extracted Text"),
    title="Text Extractor for Bills and Documents",
    description="Upload your bill or document to extract the text content.",
)

# Launch the Gradio app
iface.launch()