Spaces:

abdur75648
/

UrduOCR-UTRNet

Running

App Files Files Community

Abdur Rahman commited on Jan 30, 2024

Commit

390ca68

1 Parent(s): 4347569

Deploy to HuggingFace spaces

Browse files

Files changed (15) hide show

README.md +5 -1
UrduGlyphs.txt +180 -0
app.py +65 -0
model.py +57 -0
modules/.DS_Store +0 -0
modules/cnn/.DS_Store +0 -0
modules/cnn/__pycache__/unet.cpython-310.pyc +0 -0
modules/cnn/unet.py +116 -0
modules/dropout_layer.py +16 -0
modules/feature_extraction.py +11 -0
modules/prediction.py +81 -0
modules/sequence_modeling.py +36 -0
read.py +39 -0
requirements.txt +11 -0
utils.py +75 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: UrduOCR UTRNet
-emoji: 🐨
 colorFrom: red
 colorTo: red
 sdk: gradio
@@ -8,6 +8,10 @@ sdk_version: 4.16.0
 app_file: app.py
 pinned: false
 license: cc-by-nc-sa-4.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: UrduOCR UTRNet
+emoji: 📖
 colorFrom: red
 colorTo: red
 sdk: gradio
 app_file: app.py
 pinned: false
 license: cc-by-nc-sa-4.0
+references:
+  - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+  - https://abdur75648.github.io/UTRNet/
+  - https://arxiv.org/abs/2306.15782
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

UrduGlyphs.txt ADDED Viewed

	@@ -0,0 +1,180 @@

+ا
+آ
+ب
+پ
+ت
+ٹ
+ث
+ج
+چ
+ح
+خ
+د
+ڈ
+ذ
+ر
+ڑ
+ز
+ژ
+س
+ش
+ص
+ض
+ط
+ظ
+ع
+غ
+ف
+ق
+ک
+ك
+گ
+ل
+م
+ن
+ں
+و
+ہ
+ھ
+ء
+ی
+ے
+ئ
+ۓ
+ي
+ې
+ٿ
+ڐ
+ڙ
+أ
+ؤ
+ۀ
+ۃ
+ة
+ه
+ۂ
+ﮥ
+ٴ
+َ
+ً
+ُ
+ِ
+ٍ
+ْ
+ٗ
+ٓ
+ٰ
+ٖ
+٘
+ٔ
+ّ
+ؔ
+۰
+۱
+۲
+۳
+۴
+۵
+۶
+۷
+۸
+۹
+٪
+%
++
+=
+٤
+٫
+,
+-
+_
+٥
+٬
+"
+'
+/
+\
+>
+<
+؍
+،
+؛
+:
+؟
+{
+}
+[
+]
+(
+)
+.
+‘
+’
+٠
+۔
+“
+”
+!
+*
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import torch
+import gradio as gr
+from read import text_recognizer
+from model import Model
+from utils import CTCLabelConverter
+from kraken import binarization
+from kraken import pageseg as detection_model
+from PIL import ImageDraw
+""" vocab / character number configuration """
+file = open("UrduGlyphs.txt","r",encoding="utf-8")
+content = file.readlines()
+content = ''.join([str(elem).strip('\n') for elem in content])
+content = content+" "
+""" model configuration """
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+converter = CTCLabelConverter(content)
+recognition_model = Model(num_class=len(converter.character), device=device)
+modrecognition_modelel = recognition_model.to(device)
+recognition_model.load_state_dict(torch.load("best_norm_ED.pth", map_location=device))
+recognition_model.eval()
+examples = ["1.jpg","2.jpg","3.jpg"]
+input = gr.Image(type="pil",image_mode="RGB", label="Input Image")
+def predict(input):
+    "Line Detection"
+    bw_input = binarization.nlbin(input)
+    bounding_boxes = detection_model.segment(bw_input)['boxes']
+    bounding_boxes.sort(key=lambda x: x[1])
+    "Draw the bounding boxes"
+    draw = ImageDraw.Draw(input)
+    for box in bounding_boxes:
+        draw.rectangle(box, outline='red', width=3)
+    "Crop the detected lines"
+    cropped_images = []
+    for box in bounding_boxes:
+        cropped_images.append(input.crop(box))
+    len(cropped_images)
+    "Recognize the text"
+    texts = []
+    for img in cropped_images:
+        texts.append(text_recognizer(img, recognition_model, converter, device))
+    "Join the text"
+    text = "\n".join(texts)
+    "Return the image with bounding boxes and the text"
+    return input,text
+output_image = gr.Image(type="pil",image_mode="RGB",label="Detected Lines")
+output_text = gr.Textbox(label="Recognized Text",interactive=True,show_copy_button=True)
+iface = gr.Interface(predict,
+                     inputs=input,
+                     outputs=[output_image,output_text],
+                     title="End-to-End Urdu OCR",
+                     description="Demo Web App For UTRNet (https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition)",
+                     examples=examples,
+                     allow_flagging="never")
+iface.launch()

model.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch.nn as nn
+from modules.dropout_layer import dropout_layer
+from modules.sequence_modeling import BidirectionalLSTM
+from modules.feature_extraction import UNet_FeatureExtractor
+class Model(nn.Module):
+    def __init__(self, num_class=181, device='cpu'):
+        super(Model, self).__init__()
+        self.device = device
+        """ FeatureExtraction """
+        self.FeatureExtraction = UNet_FeatureExtractor(1, 512)
+        self.FeatureExtraction_output = 512
+        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1))
+        """
+        Temporal Dropout
+        """
+        self.dropout1 = dropout_layer(self.device)
+        self.dropout2 = dropout_layer(self.device)
+        self.dropout3 = dropout_layer(self.device)
+        self.dropout4 = dropout_layer(self.device)
+        self.dropout5 = dropout_layer(self.device)
+        """ Sequence modeling"""
+        self.SequenceModeling = nn.Sequential(
+            BidirectionalLSTM(self.FeatureExtraction_output, 256, 256),
+            BidirectionalLSTM(256, 256, 256))
+        self.SequenceModeling_output = 256
+        """ Prediction """
+        self.Prediction = nn.Linear(self.SequenceModeling_output, num_class)
+    def forward(self, input, text=None, is_train=True):
+        """ Feature extraction stage """
+        visual_feature = self.FeatureExtraction(input)
+        visual_feature = self.AdaptiveAvgPool(visual_feature.permute(0, 3, 1, 2))
+        visual_feature = visual_feature.squeeze(3)
+        """ Temporal Dropout + Sequence modeling stage """
+        visual_feature_after_dropout1 = self.dropout1(visual_feature)
+        visual_feature_after_dropout2 = self.dropout2(visual_feature)
+        visual_feature_after_dropout3 = self.dropout3(visual_feature)
+        visual_feature_after_dropout4 = self.dropout4(visual_feature)
+        visual_feature_after_dropout5 = self.dropout5(visual_feature)
+        contextual_feature1 = self.SequenceModeling(visual_feature_after_dropout1)
+        contextual_feature2 = self.SequenceModeling(visual_feature_after_dropout2)
+        contextual_feature3 = self.SequenceModeling(visual_feature_after_dropout3)
+        contextual_feature4 = self.SequenceModeling(visual_feature_after_dropout4)
+        contextual_feature5 = self.SequenceModeling(visual_feature_after_dropout5)
+        contextual_feature =  ( (contextual_feature1).add ((contextual_feature2).add(((contextual_feature3).add(((contextual_feature4).add(contextual_feature5)))))) ) * (1/5)
+        """ Prediction stage """
+        prediction = self.Prediction(contextual_feature.contiguous())
+        return prediction

modules/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

modules/cnn/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

modules/cnn/__pycache__/unet.cpython-310.pyc ADDED Viewed

Binary file (3.53 kB). View file

modules/cnn/unet.py ADDED Viewed

	@@ -0,0 +1,116 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# Code For UNet Feature Extractor - Source - https://github.com/milesial/Pytorch-UNet
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels, mid_channels=None):
+        super().__init__()
+        if not mid_channels:
+            mid_channels = out_channels
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(mid_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.double_conv(x)
+class Down(nn.Module):
+    """Downscaling with maxpool then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.maxpool_conv = nn.Sequential(
+            nn.MaxPool2d(2),
+            DoubleConv(in_channels, out_channels)
+        )
+    def forward(self, x):
+        return self.maxpool_conv(x)
+class Up(nn.Module):
+    """Upscaling then double conv"""
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
+        self.conv = DoubleConv(in_channels, out_channels)
+    def forward(self, x1, x2):
+        x1 = self.up(x1)
+        # input is CHW
+        diffY = x2.size()[2] - x1.size()[2]
+        diffX = x2.size()[3] - x1.size()[3]
+        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
+                        diffY // 2, diffY - diffY // 2])
+        x = torch.cat([x2, x1], dim=1)
+        return self.conv(x)
+class OutConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(OutConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        return self.conv(x)
+class UNet(nn.Module):
+    def __init__(self, n_channels=1, n_classes=512):
+        super(UNet, self).__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
+        self.inc = DoubleConv(n_channels, 32)
+        self.down1 = Down(32, 64)
+        self.down2 = Down(64, 128)
+        self.down3 = Down(128, 256)
+        self.down4 = Down(256, 512)
+        self.up1 = Up(512, 256)
+        self.up2 = Up(256, 128)
+        self.up3 = Up(128, 64)
+        self.up4 = Up(64, 32)
+        self.outc = OutConv(32, n_classes)
+    def forward(self, x):
+        # print(x.shape) # torch.Size([1, 1, 32, 400])
+        x1 = self.inc(x)
+        # print(x1.shape) # torch.Size([1, 32, 32, 400])
+        x2 = self.down1(x1)
+        # print(x2.shape) # torch.Size([1, 64, 16, 200])
+        x3 = self.down2(x2)
+        # print(x3.shape) # torch.Size([1, 128, 8, 100])
+        x4 = self.down3(x3)
+        # print(x4.shape) # torch.Size([1, 256, 4, 50])
+        x5 = self.down4(x4)
+        # print(x5.shape) # torch.Size([1, 512, 2, 25])
+        # print("Upscaling...")
+        x = self.up1(x5, x4)
+        # print(x.shape) # torch.Size([1, 256, 4, 50])
+        x = self.up2(x, x3)
+        # print(x.shape) # torch.Size([1, 128, 8, 100])
+        x = self.up3(x, x2)
+        # print(x.shape) # torch.Size([1, 64, 16, 200])
+        x = self.up4(x, x1)
+        # print(x.shape) # torch.Size([1, 32, 32, 400])
+        logits = self.outc(x)
+        # print(logits.shape) # torch.Size([1, 512, 32, 400])
+        return logits
+# x = torch.randn(1, 1, 32, 400)
+# net = UNet()
+# out = net(x)
+# print(out.shape)

modules/dropout_layer.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch.nn as nn
+import torch
+import numpy as np
+class dropout_layer(nn.Module):
+    def __init__(self,device):
+        super(dropout_layer, self).__init__()
+        self.device = device
+    def forward(self, input):
+        nums = (np.random.rand(input.shape[1]) > 0.2).astype (int)
+        dummy_array_output = torch.from_numpy(nums).to(self.device)
+        dummy_array_output_t = torch.reshape(dummy_array_output, (input.shape[1], 1)).to(self.device) #Transpose
+        dummy_array_output_f = dummy_array_output_t.repeat(input.shape[0], 1,input.shape[2]).to(self.device) #Same size as input
+        output =  input*dummy_array_output_f  #element-wise multiplication
+        return output

modules/feature_extraction.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch.nn as nn
+from modules.cnn.unet import UNet
+class UNet_FeatureExtractor(nn.Module):
+    def __init__(self, input_channel=1, output_channel=512):
+        super(UNet_FeatureExtractor, self).__init__()
+        self.ConvNet = UNet(input_channel, output_channel)
+    def forward(self, input):
+        return self.ConvNet(input)

modules/prediction.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Attention(nn.Module):
+    def __init__(self, input_size, hidden_size, num_classes, device):
+        super(Attention, self).__init__()
+        self.attention_cell = AttentionCell(input_size, hidden_size, num_classes)
+        self.hidden_size = hidden_size
+        self.num_classes = num_classes
+        self.generator = nn.Linear(hidden_size, num_classes)
+        self.device = device
+    def _char_to_onehot(self, input_char, onehot_dim=38):
+        input_char = input_char.unsqueeze(1)
+        batch_size = input_char.size(0)
+        one_hot = torch.FloatTensor(batch_size, onehot_dim).zero_().to(self.device)
+        one_hot = one_hot.scatter_(1, input_char, 1)
+        return one_hot
+    def forward(self, batch_H, text, is_train=True, batch_max_length=25):
+        """
+        input:
+            batch_H : contextual_feature H = hidden state of encoder. [batch_size x num_steps x contextual_feature_channels]
+            text : the text-index of each image. [batch_size x (max_length+1)]. +1 for [GO] token. text[:, 0] = [GO].
+        output: probability distribution at each step [batch_size x num_steps x num_classes]
+        """
+        batch_size = batch_H.size(0)
+        num_steps = batch_max_length + 1  # +1 for [s] at end of sentence.
+        output_hiddens = torch.FloatTensor(batch_size, num_steps, self.hidden_size).fill_(0).to(self.device)
+        hidden = (torch.FloatTensor(batch_size, self.hidden_size).fill_(0).to(self.device),
+                  torch.FloatTensor(batch_size, self.hidden_size).fill_(0).to(self.device))
+        if is_train:
+            for i in range(num_steps):
+                # one-hot vectors for a i-th char. in a batch
+                char_onehots = self._char_to_onehot(text[:, i], onehot_dim=self.num_classes)
+                # hidden : decoder's hidden s_{t-1}, batch_H : encoder's hidden H, char_onehots : one-hot(y_{t-1})
+                hidden, _ = self.attention_cell(hidden, batch_H, char_onehots)
+                output_hiddens[:, i, :] = hidden[0]  # LSTM hidden index (0: hidden, 1: Cell)
+            probs = self.generator(output_hiddens)
+        else:
+            targets = torch.LongTensor(batch_size).fill_(0).to(self.device)  # [GO] token
+            probs = torch.FloatTensor(batch_size, num_steps, self.num_classes).fill_(0).to(self.device)
+            for i in range(num_steps):
+                char_onehots = self._char_to_onehot(targets, onehot_dim=self.num_classes)
+                hidden, _ = self.attention_cell(hidden, batch_H, char_onehots)
+                probs_step = self.generator(hidden[0])
+                probs[:, i, :] = probs_step
+                _, next_input = probs_step.max(1)
+                targets = next_input
+        return probs  # batch_size x num_steps x num_classes
+class AttentionCell(nn.Module):
+    def __init__(self, input_size, hidden_size, num_embeddings):
+        super(AttentionCell, self).__init__()
+        self.i2h = nn.Linear(input_size, hidden_size, bias=False)
+        self.h2h = nn.Linear(hidden_size, hidden_size)  # either i2i or h2h should have bias
+        self.score = nn.Linear(hidden_size, 1, bias=False)
+        self.rnn = nn.LSTMCell(input_size + num_embeddings, hidden_size)
+        self.hidden_size = hidden_size
+    def forward(self, prev_hidden, batch_H, char_onehots):
+        # [batch_size x num_encoder_step x num_channel] -> [batch_size x num_encoder_step x hidden_size]
+        batch_H_proj = self.i2h(batch_H)
+        prev_hidden_proj = self.h2h(prev_hidden[0]).unsqueeze(1)
+        e = self.score(torch.tanh(batch_H_proj + prev_hidden_proj))  # batch_size x num_encoder_step * 1
+        alpha = F.softmax(e, dim=1)
+        context = torch.bmm(alpha.permute(0, 2, 1), batch_H).squeeze(1)  # batch_size x num_channel
+        concat_context = torch.cat([context, char_onehots], 1)  # batch_size x (num_channel + num_embedding)
+        cur_hidden = self.rnn(concat_context, prev_hidden)
+        return cur_hidden, alpha

modules/sequence_modeling.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import torch.nn as nn
+class BidirectionalLSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(BidirectionalLSTM, self).__init__()
+        self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
+        self.linear = nn.Linear(hidden_size * 2, output_size)
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        self.rnn.flatten_parameters()
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output
+class LSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, output_size):
+        super(LSTM, self).__init__()
+        self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
+        self.linear = nn.Linear(hidden_size, output_size)
+    def forward(self, input):
+        """
+        input : visual feature [batch_size x T x input_size]
+        output : contextual feature [batch_size x T x output_size]
+        """
+        self.rnn.flatten_parameters()
+        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x hidden_size
+        output = self.linear(recurrent)  # batch_size x T x output_size
+        return output

read.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import math
+import torch
+from PIL import Image
+import torch.utils.data
+from utils import NormalizePAD
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+def text_recognizer(img_cropped, model, converter, device):
+    """ Image processing """
+    img = img_cropped.convert('L')
+    img = img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
+    w, h = img.size
+    ratio = w / float(h)
+    if math.ceil(32 * ratio) > 400:
+        resized_w = 400
+    else:
+        resized_w = math.ceil(32 * ratio)
+    img = img.resize((resized_w, 32), Image.Resampling.BICUBIC)
+    transform = NormalizePAD((1, 32, 400))
+    img = transform(img)
+    img = img.unsqueeze(0)
+    batch_size = 1
+    img = img.to(device)
+    """ Prediction """
+    preds = model(img)
+    preds_size = torch.IntTensor([preds.size(1)] * batch_size)
+    _, preds_index = preds.max(2)
+    preds_str = converter.decode(preds_index.data, preds_size.data)[0]
+    return preds_str
+# if __name__ == '__main__':
+#     image_path = "test.jpg"
+#     img_cropped = Image.open(image_path)
+#     preds_str = text_recognizer(img_cropped)
+#     print(preds_str)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch>=1.9.1
+torchvision>=0.10.1
+PyArabic==0.6.15
+arabic-reshaper==3.0.0
+numpy==1.21.6
+Pillow==9.4.0
+tqdm==4.65.0
+opencv-python==4.5.1.48
+opencv-contrib-python==4.5.1.48
+kraken==4.3.13
+gradio==4.15.0

utils.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# A simplified version of the original code - https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition
+import math
+import torch
+import torchvision.transforms as T
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+class NormalizePAD(object):
+    def __init__(self, max_size, PAD_type='right'):
+        self.toTensor = T.ToTensor()
+        self.max_size = max_size
+        self.max_width_half = math.floor(max_size[2] / 2)
+        self.PAD_type = PAD_type
+    def __call__(self, img):
+        img = self.toTensor(img)
+        img.sub_(0.5).div_(0.5)
+        c, h, w = img.size()
+        Pad_img = torch.FloatTensor(*self.max_size).fill_(0)
+        Pad_img[:, :, :w] = img  # right pad
+        if self.max_size[2] != w:  # add border Pad
+            Pad_img[:, :, w:] = img[:, :, w - 1].unsqueeze(2).expand(c, h, self.max_size[2] - w)
+        return Pad_img
+class CTCLabelConverter(object):
+    """ Convert between text-label and text-index """
+    def __init__(self, character):
+        # character (str): set of the possible characters.
+        dict_character = list(character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            # NOTE: 0 is reserved for 'CTCblank' token required by CTCLoss
+            self.dict[char] = i + 1
+        self.character = ['[CTCblank]'] + dict_character  # dummy '[CTCblank]' token for CTCLoss (index 0)
+    def encode(self, text, batch_max_length=25):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+            batch_max_length: max length of text label in the batch. 25 by default
+        output:
+            text: text index for CTCLoss. [batch_size, batch_max_length]
+            length: length of each text. [batch_size]
+        """
+        length = [len(s) for s in text]
+        # The index used for padding (=0) would not affect the CTC loss calculation.
+        batch_text = torch.LongTensor(len(text), batch_max_length).fill_(0)
+        for i, t in enumerate(text):
+            text = list(t)
+            text = [self.dict[char] for char in text]
+            batch_text[i][:len(text)] = torch.LongTensor(text)
+        return (batch_text, torch.IntTensor(length))
+    def decode(self, text_index, length):
+        """ convert text-index into text-label. """
+        texts = []
+        for index, l in enumerate(length):
+            t = text_index[index, :]
+            char_list = []
+            for i in range(l):
+                if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])):  # removing repeated characters and blank.
+                    char_list.append(self.character[t[i]])
+            text = ''.join(char_list)
+            texts.append(text)
+        return texts