OCR_Sample_RGB / Utils(2).py
NassimeBejaia's picture
Upload 4 files
5555b23
raw history blame
No virus
4.24 kB
import torch
from torch.utils.data import Dataset,DataLoader
from PIL import Image
import os
import cv2
import tensorflow as tf
import numpy as np
from tensorflow import keras
############ extractdetection ################
#alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha =['A', 'A1', 'A2' ,'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L','M',
'N','O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'Z1', 'Z2','Z3','Z4','Z5']
############ decode_detection ################
# labels from KHATT dataset
"""characters = [' ', '!', '"', '#', '%', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '>', '?', '[', '\\', ']', 'x', '\xa0', '×', '،', '؛', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', '–', '‘']
characters.sort()"""
# labels from old arabic
characters = [' ', '.', '[', ']', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٔ', 'ٕ', '١', '٢', '٣', '٤', '٥', '٧', '٨', 'ٮ', 'ٯ', 'ٰ', 'ڡ', 'ک', 'ں', 'ی', '۴', '\u202c', 'ﭐ', 'ﺟ', 'ﺣ', 'ﻛ', '�']
characters.sort()
characters.sort()
max_length = 132
img_height, img_width = 1056,64
def label_to_num(label,max_length=max_length):
label_num = []
for ch in label:
try:
label_num.append(characters.index(ch))
except:
pass
return keras.utils.pad_sequences( [label_num], maxlen=max_length, dtype='int32',
padding='post',truncating='pre',value=len(characters)+2)[0]
def num_to_label(num):
#if isinstance(num,torch.Tensor):
#num=num.to(torch.int8)
ret = ""
for ch in num:
if int(ch)==-1 or ch ==len(characters)+2 :# or ch==-1 : # pad symbole
break
try:
ret+=characters[int(ch)]
except:
pass
return ret
def decode_predictions(pred,greedy = True):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=greedy)[0][0][
:, :max_length ]
output_text = []
for res in results:
#decoded = tokenizer.sequences_to_texts([res.numpy()])
#decoded = list(map(num_to_label,results))
decoded = num_to_label( res)
output_text.append(decoded)
return output_text
############ dataloader ################
# def encode_single_sample(path_dir,label = None):
# img = tf.io.read_file(path_dir)
# img = tf.io.decode_jpeg( img, name=None)
# img.set_shape([img.shape[0], img.shape[1],img.shape[-1]])
# img = tf.image.rot90(img, k=1, name=None)
# img = tf.image.resize(img, [img_height, img_width])
# img=img/255.0
# #img=tf.concat([img,img,img],axis=-1)
# return img
def encode_single_sample(path_dir, label=None):
img = tf.io.read_file(path_dir)
img = tf.io.decode_jpeg(img, name=None)
img.set_shape([img.shape[0], img.shape[1], img.shape[-1]])
img = tf.image.rot90(img, k=1, name=None)
img = tf.image.resize(img, [img_height, img_width])
img = tf.image.rgb_to_grayscale(img) # Convert image to grayscale
img = img/255.0
return img
batch_size = 16
def Loadlines(path_lines):
path_lines.sort()
test_dataset = tf.data.Dataset.from_tensor_slices(path_lines )
test_dataset = (
test_dataset.map(
encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
.batch(batch_size,drop_remainder=False )
.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
return test_dataset
############ load model ################
""" load model_finetuned """
def load_model():
return keras.models.load_model('/home/user/app/text_line_model.h5')