File size: 4,967 Bytes
5555b23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1bbd7a
b3a45c6
 
5555b23
d1bbd7a
 
5555b23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1bbd7a
 
 
 
 
 
 
 
 
 
 
9e67aa8
 
 
 
 
 
 
 
 
 
 
 
d1bbd7a
5555b23
9e67aa8
5555b23
 
9e67aa8
 
d1bbd7a
 
5555b23
 
 
 
e4f58f9
5555b23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e7a9af
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import torch
from torch.utils.data import Dataset,DataLoader
from PIL import Image

import os 
import cv2

import tensorflow as tf
import numpy as np
from tensorflow import keras


############ extractdetection  ################


#alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha =['A', 'A1', 'A2' ,'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L','M', 
      'N','O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
      'Z1', 'Z2','Z3','Z4','Z5']


############    decode_detection     ################


# labels from old arabic
# characters = [' ', '.', '[', ']', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٔ', 'ٕ', '١', '٢', '٣', '٤', '٥', '٧', '٨', 'ٮ', 'ٯ', 'ٰ', 'ڡ', 'ک', 'ں', 'ی', '۴', '\u202c', 'ﭐ', 'ﺟ', 'ﺣ', 'ﻛ', '�']
#characters = [' ', '.', '[', ']', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٔ', 'ٕ', 'ٖ', '٠', '١', '٢', '٣', '٨', 'ٮ', 'ٰ', 'ڡ', 'ک', 'ں', 'ی', 'ݘ', '\u202c', 'ﭐ', 'ﺣ', 'ﻛ']
characters = [' ', '"', '.', ':', '[', ']', '،', '؟', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 'ِ', 'ّ', 'ْ', 'ٔ', 'ٕ', 'ٖ', '٠', '١', '٢', '٣', '٨', 'ٮ', 'ٰ', 'ڡ', 'ک', 'ں', 'ی', 'ݘ', '\u202c', 'ﭐ', 'ﺣ', 'ﻛ']
characters.sort()

# characters.sort()
max_length = 132
img_height, img_width = 1056,64

def label_to_num(label,max_length=max_length):
    label_num = []
    for ch in label:        
        try:
            label_num.append(characters.index(ch))
        except:
            pass

    return   keras.utils.pad_sequences( [label_num], maxlen=max_length, dtype='int32',
                                       padding='post',truncating='pre',value=len(characters)+2)[0]


def num_to_label(num):
    #if  isinstance(num,torch.Tensor):
       #num=num.to(torch.int8)
    ret = ""
    for ch in num:
        if int(ch)==-1 or ch ==len(characters)+2 :# or ch==-1 : # pad symbole
            break
        try:
            ret+=characters[int(ch)]
        except:
            pass
    return ret



def decode_predictions(pred,greedy = True):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=greedy)[0][0][
        :, :max_length ]
    output_text = []
    for res in results:
        #decoded = tokenizer.sequences_to_texts([res.numpy()])
        #decoded = list(map(num_to_label,results))
        decoded = num_to_label( res)
        output_text.append(decoded)
    return output_text


############ dataloader ################


 

# def encode_single_sample(path_dir, label=None):
#     img = tf.io.read_file(path_dir)
#     img = tf.io.decode_jpeg(img, name=None)
#     img.set_shape([img.shape[0], img.shape[1], img.shape[-1]])
#     img = tf.image.rot90(img, k=1, name=None)
#     img = tf.image.resize(img, [img_height, img_width])
#     # img = tf.image.rgb_to_grayscale(img)  # Convert image to grayscale
#     img = img/255.0
#     return img
    

# def encode_single_sample(path_dir):
#     img = tf.io.read_file(path_dir)
#     img = tf.io.decode_jpeg( img,  name=None)
#     img.set_shape([img.shape[0], img.shape[1],img.shape[-1]])
#     img = tf.image.rot90(img, k=1, name=None)
#     rand = tf.random.uniform( (),   minval=0,maxval=1, dtype=tf.dtypes.float32,seed=None,name=None)
#     img = tf.image.resize(img, [img_height, img_width])

#     img=img/255.0

#     return  img

def encode_single_sample(path_dir):
    img = tf.io.read_file(path_dir)
    img = tf.io.decode_jpeg(img, channels=3)  # Decode the image in RGB format
    img = tf.image.rot90(img, k=1, name=None)
    img = tf.image.resize(img, [img_height, img_width])
    img = img / 255.0  # Normalize the pixel values
    return img



batch_size = 16 

def Loadlines(path_lines):
    # path_lines.sort()
    test_dataset = tf.data.Dataset.from_tensor_slices(path_lines )
    test_dataset = (
        test_dataset.map(
            encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
        )
        .batch(batch_size,drop_remainder=False )
        .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    )     
    return test_dataset


############    load model     ################

""" load model_finetuned """
def load_model():
    return keras.models.load_model('/home/user/app/RGB_old_texline_v1.h5')