import gradio as gr from transformers import pipeline from PIL import Image, ImageEnhance, ImageFilter # Load the OCR model ocr_model = pipeline("image-to-text", model="microsoft/trocr-large-printed") def preprocess_image(image_path): # Open the image image = Image.open(image_path) # Convert to grayscale image = image.convert("L") # Enhance the contrast enhancer = ImageEnhance.Contrast(image) image = enhancer.enhance(2) # Adjust contrast level as needed # Optionally apply a filter image = image.filter(ImageFilter.SHARPEN) return image def recognize_text(image_path): # Preprocess the image preprocessed_image = preprocess_image(image_path) # Use the model on the preprocessed image result = ocr_model(preprocessed_image) return result[0]['generated_text'] # Set up the Gradio interface interface = gr.Interface( fn=recognize_text, inputs=gr.Image(type="filepath"), # Use filepath to accept image input outputs="text", title="OCR with Trocr", description="Upload an image to recognize text using the Trocr model." ) # Launch the app if __name__ == "__main__": interface.launch()