Spaces:
Runtime error
Runtime error
# Importing necessary packages | |
import torch # PyTorch used for executing deep learning functions | |
from PIL import Image, ImageTk # to display the image from the encoded pixels | |
import gradio as gr | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel # importing the TrOCR processor representing the visual feature extrcator and tokenizer of the TrOCR model, and the TrOCR model | |
import os | |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
auth_token = os.environ.get("TOKEN_FROM_SECRET") or True | |
model = VisionEncoderDecoderModel.from_pretrained("sk2003/hist-trocr", use_auth_token=auth_token) | |
# def process_image(image): | |
# # prepare image | |
# pixel_values = processor(image, return_tensors="pt").pixel_values | |
# # generate | |
# generated_ids = model.generate(pixel_values) | |
# # decode | |
# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# return generated_text | |
def inference_on_image(image): | |
pixel_values = processor(image, return_tensors="pt").pixel_values | |
pred = model.generate(pixel_values, max_new_tokens=100) | |
dec_pred = processor.batch_decode(pred, skip_special_tokens=True)[0] | |
return dec_pred | |
title = "Hist-TrOCR" | |
description = "Interactive demo of Hist-TrOCR, a fine-tuned version of Microsoft's TrOCR which is an end-to-end transformer model used for recognition of text from single-line or word images. It has been fine-tuned on historical text images. Upload an image (or select from the given samples) and click 'Submit' to get the transcription. Results may take a few seconds to show up." | |
iface = gr.Interface(fn=inference_on_image, | |
inputs=gr.inputs.Image(type="pil"), | |
outputs=gr.outputs.Textbox(), | |
title=title, | |
examples=[["309-35.png"],["270-01-03.png"],["v211285.b750.s69.jpg"]], | |
description=description) | |
iface.launch(debug=True) | |