import gradio as gr
from transformers import pipeline
from PIL import Image, ImageEnhance, ImageFilter

# Load the OCR model
ocr_model = pipeline("image-to-text", model="microsoft/trocr-large-printed")

def preprocess_image(image_path):
    # Open the image
    image = Image.open(image_path)
    # Convert to grayscale
    image = image.convert("L")
    # Enhance the contrast
    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(2)  # Adjust contrast level as needed
    # Optionally apply a filter
    image = image.filter(ImageFilter.SHARPEN)
    return image

def recognize_text(image_path):
    # Preprocess the image
    preprocessed_image = preprocess_image(image_path)
    # Use the model on the preprocessed image
    result = ocr_model(preprocessed_image)
    return result[0]['generated_text']

# Set up the Gradio interface
interface = gr.Interface(
    fn=recognize_text,
    inputs=gr.Image(type="filepath"),  # Use filepath to accept image input
    outputs="text",
    title="OCR with Trocr",
    description="Upload an image to recognize text using the Trocr model."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()