Computer Vision
Collection
Dhivehi Computer Vision: Image classification, OCR, image-to-text, and text-to-image models for Thaana content
•
16 items
•
Updated
A TrOCR model finetuned for Dhivehi (Divehi/Maldivian) text recognition using DeiT base encoder and BERT decoder.
The model was trained with:
from PIL import Image
import torch
from torchvision import transforms
from transformers import (
DeiTImageProcessor,
TrOCRProcessor,
VisionEncoderDecoderModel,
AutoTokenizer
)
class OCRPredictor:
def __init__(self, model_name="alakxender/trocr-dv-diet-base-bert"):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model = self._load_model(model_name)
self.processor = self._load_processor()
self.transform = self._get_transforms()
def _load_model(self, model_name):
model = VisionEncoderDecoderModel.from_pretrained(model_name)
return model.to(self.device)
def _load_processor(self):
tokenizer = AutoTokenizer.from_pretrained("alakxender/trocr-dv-diet-base-bert")
image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-384")
return TrOCRProcessor(image_processor=image_processor, tokenizer=tokenizer)
def _get_transforms(self):
return transforms.Compose([
transforms.Resize((384, 384)),
transforms.ToTensor(),
transforms.Normalize([0.5] * 3, [0.5] * 3)
])
def predict(self, image_path):
image = Image.open(image_path).convert("RGB")
pixel_values = self.transform(image).unsqueeze(0).to(self.device)
outputs = self.model.generate(
pixel_values,
max_length=64,
num_beams=4,
early_stopping=True,
length_penalty=2.0,
no_repeat_ngram_size=3
)
return self.processor.decode(outputs[0], skip_special_tokens=True)
# Usage
predictor = OCRPredictor()
text = predictor.predict("ocr2.png")
print(text) # ތިން މިނިސްޓްރީއެއް ހިންގާ މ.ގްރީން ބިލްޑިންގުގައި މިދިޔަ ބުރާސްފަތި ދުވަހު ހިނގި ބޮޑު އަލިފާނުގެ.
[
{
"file_name": "data/images/DV01-04/DV01-04_140.jpg",
"predicted_text": "ޤާނޫނުގެ 42 ވަނަ މާއްދާގައި ލާޒިމްކުރާ މި ރިޕޯޓު ތައްޔާރުކޮށް ފޮނުވުމުގެ ޒިންމާއަކީ ޤާނޫނުން އިދާރާގެ އިންފޮމޭޝަން އޮފިސަރު ކުރައްވަންޖެހޭ ކަމެކެވެ .",
"true_text": "ޤާނޫނުގެ 42 ވަނަ މާއްދާގައި ލާޒިމްކުރާ މި ރިޕޯޓު ތައްޔާރުކޮށް ފޮނުވުމުގެ ޒިންމާއަކީ ޤާނޫނުން އިދާރާގެ އިންފޮމޭޝަން އޮފިސަރު ކުރައްވަންޖެހޭ ކަމެކެވެ."
}
]
Base model
facebook/deit-base-distilled-patch16-384