yamanavijayavardhan's picture
printing extracted text18
6139662
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import cv2
import os
import torch
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils import notification_queue, log_print
from all_models import models
def text(image_cv):
try:
# Get model instance from singleton
model, processor = models.get_trocr_model()
if not isinstance(image_cv, list):
image_cv = [image_cv]
t = ""
total_images = len(image_cv)
log_print(f"Processing {total_images} image(s) for text extraction")
for i, img in enumerate(image_cv):
try:
log_print(f"Processing image {i+1}/{total_images}")
# Validate image
if img is None:
log_print(f"Skipping image {i+1} - Image is None", "WARNING")
continue
# Convert to RGB
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = Image.fromarray(img_rgb)
# Get pixel values
pixel_values = processor(image, return_tensors="pt").pixel_values
if torch.cuda.is_available():
pixel_values = pixel_values.to(models.device)
# Generate text
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Clean up the text
cleaned_text = generated_text.replace(" ", "")
t = t + cleaned_text + " "
log_print(f"Successfully extracted text from image {i+1}: {cleaned_text}")
# Clean up CUDA memory
if torch.cuda.is_available():
del pixel_values
del generated_ids
torch.cuda.empty_cache()
except Exception as e:
log_print(f"Error processing image {i+1}: {str(e)}", "ERROR")
continue
return t.strip()
except Exception as e:
error_msg = f"Error in text function: {str(e)}"
log_print(error_msg, "ERROR")
notification_queue.put({
"type": "error",
"message": error_msg
})
return ""
finally:
# Release model reference
models.release_trocr_model()