import gradio as gr import os from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image # 加载模型和处理器 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") # 定义图像OCR识别函数 def ocr_images(images): results = {} for image in images: # 确保图片是RGB格式 image = Image.open(image).convert("RGB") pixel_values = processor(images=image, return_tensors="pt").pixel_values output_ids = model.generate(pixel_values) transcription = processor.batch_decode(output_ids, skip_special_tokens=True)[0] results[image.filename] = transcription return results # 定义Gradio界面 def ocr_interface(images): results = ocr_images(images) result_text = "\n\n".join([f"{filename}:\n{transcription}" for filename, transcription in results.items()]) return result_text # 创建Gradio应用 with gr.Blocks() as demo: gr.Markdown("## 多图片OCR识别") with gr.Row(): image_input = gr.File(label="选择多张图片", file_count="multiple", type="file") output_text = gr.Textbox(label="OCR 识别结果") # 添加按钮和功能绑定 submit_button = gr.Button("开始识别") submit_button.click(ocr_interface, inputs=image_input, outputs=output_text) # 启动应用 demo.launch()