import os os.system('python -m pip install --upgrade pip') os.system('pip install transformers torch') import gradio as gr from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import requests INFERENCE_PROCESSOR = "microsoft/trocr-base-printed" INFERENCE_MODEL_NAME = "DunnBC22/trocr-base-printed_captcha_ocr" processor = TrOCRProcessor.from_pretrained(INFERENCE_PROCESSOR) model = VisionEncoderDecoderModel.from_pretrained(INFERENCE_MODEL_NAME) # load image examples from the Captcha Dataset urls = [ 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/sample/2b827.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200825Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=7b0b5a9cb5bc595d44a04e7a0ad2da8659333c76e6576bc06deda7a52f8628e5c72b2a75f6da93076027bb1aa6963a5bfeae2ba9c21462bd8b1cdb378ebd78658d80481afaf0ae0a7f6459a4622828d807b380b5c08e008697856cf5c775418ad2324fdcbcbdc607cf434566bf897cb09b78fc51fa1b580fd328bec4170d9ab311d703cd59f059f996a4d0bf43d4c342823e5d4200681973add8dc8842002c2bad8b36f8fe7992b7f8bb3a64ab2355dde9095488799d0164038428a7eb722b55a9debeee6e5e359c7328f8e89dbdabe3315e8ba5bf8144adcd9705016e0ce3d68ccb525c8b83067ba236e1e86904a8a995bba73e61bc27e3e8d6b194b4fa92d7', 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/2g783.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200821Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=63bcf63cb9833ac0f054034b33c91c775234c2dc54919a9664c7ce811539291c32608afe1b52445fbfd1062e0b6a7423c179677ec6998442c13189a02063fd121841200eddc76f6463f02f86f072ad8d6c3fce8dcb239b0c3fcf4aaa549b4ba968ba4f6f28674b4e700d8ed5818e3888ded9a2e41f44342e426bce1a180aaa0c7d4a25b27753154a6be8c9a3df34475fa226843fb3457d1861c7ac915b869a839dacb38c8dfcef4f4e3846c520838727d86bcf6b9540bc7c0600c03378d78389899f7f983bb6c2268de3d24a10b5beddfd3d7b8d6711b3fda86a885335c7df54f081cc47199991d63a412593514435806f2a031e6592451cad6d81b5afe0c86e', 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/4gb3f.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200857Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=253b2c62f52afe4a001ab6b240230b177f99ac868c58c718b6ecaa6b1fca0f39f3027023cda3e8ae5b38b86ad31bbb79cbd2dff74f7ab33ec86d20d940d2829d7640ee133db87742544496e53c97b2c05dd64fc9f9e29bedd451b2c92e4656a744dd63a982e65d65c889b8b204823038cdc7ef6cc2c99c2d30e52c84293cad03c6e15043fb976db03b96b77018ece09a58691cf8216d9cd0bde3bef25d86464772f9d8a3f001da50a75a20c85541202c49ca6ff2f7d8e9b16a8274427d881ca76874dfac819c5123807f341c24f968efefd81593ec0e04fdc600285b9e4085073b8949c1c0b75b1de2dd90d468fe548290fbd0c922bb5b392a617725d7abfecc' ] for idx, url in enumerate(urls): image = Image.open(requests.get(url, stream=True).raw) image.save(f"image_{idx}.png") def process_image(image): # prepare image pixel_values = processor(image, return_tensors="pt").pixel_values # generate (no beam search) generated_ids = model.generate(pixel_values) # decode generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text title = "Interactive Demo: Captcha Dataset" description = """OrangeFIN Asia CAPTCHA OCR Demo """ examples =[["image_0.png"], ["image_1.png"], ["image_2.png"]] trocr_iface = gr.Interface(fn=process_image, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox(), title=title, description=description, examples=examples) if __name__ == "__main__": trocr_iface.launch(debug=True)