from flask import Flask, request import gradio as gr from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import requests import io # Initialize Flask app app = Flask(__name__) # Load the processor and model processor = TrOCRProcessor.from_pretrained('microsoft/trocr-large-handwritten') model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-large-handwritten') def recognize_handwritten_text(image): # Preprocess the image pixel_values = processor(images=image, return_tensors="pt").pixel_values # Generate text generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text def capture_image(image): # Convert the image to PIL format image = Image.open(io.BytesIO(image)) # Recognize handwritten text text = recognize_handwritten_text(image) return text # Create Gradio interface iface = gr.Interface(fn=capture_image, inputs="image", outputs="text", live=True) @app.route('/capture', methods=['GET']) def capture(): return iface.launch(share=True) if __name__ == '__main__': app.run(debug=True)