File size: 3,303 Bytes
5555b23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c0ed07
5555b23
 
 
 
 
 
 
e4f58f9
5555b23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import torch
from torch.utils.data import Dataset,DataLoader
from PIL import Image

import os 
import cv2

import tensorflow as tf
import numpy as np
from tensorflow import keras


############ extractdetection  ################


#alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha =['A', 'A1', 'A2' ,'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L','M', 
      'N','O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
      'Z1', 'Z2','Z3','Z4','Z5']


############    decode_detection     ################


# labels from old arabic
characters = [' ', '.', '[', ']', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٔ', 'ٕ', '١', '٢', '٣', '٤', '٥', '٧', '٨', 'ٮ', 'ٯ', 'ٰ', 'ڡ', 'ک', 'ں', 'ی', '۴', '\u202c', 'ﭐ', 'ﺟ', 'ﺣ', 'ﻛ', '�']
characters.sort()

characters.sort()
max_length = 132
img_height, img_width = 1056,64

def label_to_num(label,max_length=max_length):
    label_num = []
    for ch in label:        
        try:
            label_num.append(characters.index(ch))
        except:
            pass

    return   keras.utils.pad_sequences( [label_num], maxlen=max_length, dtype='int32',
                                       padding='post',truncating='pre',value=len(characters)+2)[0]


def num_to_label(num):
    #if  isinstance(num,torch.Tensor):
       #num=num.to(torch.int8)
    ret = ""
    for ch in num:
        if int(ch)==-1 or ch ==len(characters)+2 :# or ch==-1 : # pad symbole
            break
        try:
            ret+=characters[int(ch)]
        except:
            pass
    return ret



def decode_predictions(pred,greedy = True):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=greedy)[0][0][
        :, :max_length ]
    output_text = []
    for res in results:
        #decoded = tokenizer.sequences_to_texts([res.numpy()])
        #decoded = list(map(num_to_label,results))
        decoded = num_to_label( res)
        output_text.append(decoded)
    return output_text


############ dataloader ################


 

def encode_single_sample(path_dir, label=None):
    img = tf.io.read_file(path_dir)
    img = tf.io.decode_jpeg(img, name=None)
    img.set_shape([img.shape[0], img.shape[1], img.shape[-1]])
    img = tf.image.rot90(img, k=1, name=None)
    img = tf.image.resize(img, [img_height, img_width])
    # img = tf.image.rgb_to_grayscale(img)  # Convert image to grayscale
    img = img/255.0
    return img
    

batch_size = 16 

def Loadlines(path_lines):
    # path_lines.sort()
    test_dataset = tf.data.Dataset.from_tensor_slices(path_lines )
    test_dataset = (
        test_dataset.map(
            encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
        )
        .batch(batch_size,drop_remainder=False )
        .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    )     
    return test_dataset


############    load model     ################

""" load model_finetuned """
def load_model():
    return keras.models.load_model('/home/user/app/text_line_model.h5')