|
|
""" |
|
|
Paper: "UTRNet: High-Resolution Urdu Text Recognition In Printed Documents" presented at ICDAR 2023 |
|
|
Authors: Abdur Rahman, Arjun Ghosh, Chetan Arora |
|
|
GitHub Repository: https://github.com/abdur75648/UTRNet-High-Resolution-Urdu-Text-Recognition |
|
|
Project Website: https://abdur75648.github.io/UTRNet/ |
|
|
Copyright (c) 2023-present: This work is licensed under the Creative Commons Attribution-NonCommercial |
|
|
4.0 International License (http://creativecommons.org/licenses/by-nc/4.0/) |
|
|
""" |
|
|
|
|
|
import torch.nn as nn |
|
|
|
|
|
class BidirectionalLSTM(nn.Module): |
|
|
|
|
|
def __init__(self, input_size, hidden_size, output_size): |
|
|
super(BidirectionalLSTM, self).__init__() |
|
|
self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True) |
|
|
self.linear = nn.Linear(hidden_size * 2, output_size) |
|
|
|
|
|
def forward(self, input): |
|
|
""" |
|
|
input : visual feature [batch_size x T x input_size] |
|
|
output : contextual feature [batch_size x T x output_size] |
|
|
""" |
|
|
self.rnn.flatten_parameters() |
|
|
recurrent, _ = self.rnn(input) |
|
|
output = self.linear(recurrent) |
|
|
return output |
|
|
|
|
|
class LSTM(nn.Module): |
|
|
|
|
|
def __init__(self, input_size, hidden_size, output_size): |
|
|
super(LSTM, self).__init__() |
|
|
self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True) |
|
|
self.linear = nn.Linear(hidden_size, output_size) |
|
|
|
|
|
def forward(self, input): |
|
|
""" |
|
|
input : visual feature [batch_size x T x input_size] |
|
|
output : contextual feature [batch_size x T x output_size] |
|
|
""" |
|
|
self.rnn.flatten_parameters() |
|
|
recurrent, _ = self.rnn(input) |
|
|
output = self.linear(recurrent) |
|
|
return output |
|
|
|
|
|
class GRU(nn.Module): |
|
|
|
|
|
def __init__(self, input_size, hidden_size, output_size): |
|
|
super(GRU, self).__init__() |
|
|
self.rnn = nn.GRU(input_size, hidden_size, batch_first=True) |
|
|
self.linear = nn.Linear(hidden_size, output_size) |
|
|
|
|
|
def forward(self, input): |
|
|
""" |
|
|
input : visual feature [batch_size x T x input_size] |
|
|
output : contextual feature [batch_size x T x output_size] |
|
|
""" |
|
|
self.rnn.flatten_parameters() |
|
|
recurrent, _ = self.rnn(input) |
|
|
output = self.linear(recurrent) |
|
|
return output |
|
|
|
|
|
class MDLSTM(nn.Module): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, input_size, hidden_size, output_size): |
|
|
super(MDLSTM, self).__init__() |
|
|
self.rnn = nn.Sequential( |
|
|
LSTM(input_size, hidden_size, 2*hidden_size), |
|
|
LSTM(2*hidden_size, hidden_size, 4*hidden_size), |
|
|
LSTM(4*hidden_size, hidden_size, 2*hidden_size), |
|
|
LSTM(2*hidden_size, hidden_size, hidden_size)) |
|
|
self.linear = nn.Linear(hidden_size, output_size) |
|
|
def forward(self, input): |
|
|
""" |
|
|
input : visual feature [batch_size x T x input_size] |
|
|
output : contextual feature [batch_size x T x output_size] |
|
|
""" |
|
|
for rnn in self.rnn: |
|
|
rnn.rnn.flatten_parameters() |
|
|
recurrent = self.rnn(input) |
|
|
output = self.linear(recurrent) |
|
|
return output |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|