import torch import torch.nn as nn import numpy as np import random import enum import traceback import os import sys import json F_DIR = os.path.dirname(os.path.realpath(__file__)) class XlitError(enum.Enum): lang_err = "Unsupported langauge ID requested ;( Please check available languages." string_err = "String passed is incompatable ;(" internal_err = "Internal crash ;(" unknown_err = "Unknown Failure" loading_err = "Loading failed ;( Check if metadata/paths are correctly configured." class Encoder(nn.Module): """ Simple RNN based encoder network """ def __init__( self, input_dim, embed_dim, hidden_dim, rnn_type="gru", layers=1, bidirectional=False, dropout=0, device="cpu", ): super(Encoder, self).__init__() self.input_dim = input_dim # src_vocab_sz self.enc_embed_dim = embed_dim self.enc_hidden_dim = hidden_dim self.enc_rnn_type = rnn_type self.enc_layers = layers self.enc_directions = 2 if bidirectional else 1 self.device = device self.embedding = nn.Embedding(self.input_dim, self.enc_embed_dim) if self.enc_rnn_type == "gru": self.enc_rnn = nn.GRU( input_size=self.enc_embed_dim, hidden_size=self.enc_hidden_dim, num_layers=self.enc_layers, bidirectional=bidirectional, ) elif self.enc_rnn_type == "lstm": self.enc_rnn = nn.LSTM( input_size=self.enc_embed_dim, hidden_size=self.enc_hidden_dim, num_layers=self.enc_layers, bidirectional=bidirectional, ) else: raise Exception("unknown RNN type mentioned") def forward(self, x, x_sz, hidden=None): """ x_sz: (batch_size, 1) - Unpadded sequence lengths used for pack_pad Return: output: (batch_size, max_length, hidden_dim) hidden: (n_layer*num_directions, batch_size, hidden_dim) | if LSTM tuple -(h_n, c_n) """ batch_sz = x.shape[0] # x: batch_size, max_length, enc_embed_dim x = self.embedding(x) ## pack the padded data # x: max_length, batch_size, enc_embed_dim -> for pack_pad x = x.permute(1, 0, 2) x = nn.utils.rnn.pack_padded_sequence(x, x_sz, enforce_sorted=False) # unpad # output: packed_size, batch_size, enc_embed_dim --> hidden from all timesteps # hidden: n_layer**num_directions, batch_size, hidden_dim | if LSTM (h_n, c_n) output, hidden = self.enc_rnn(x) ## pad the sequence to the max length in the batch # output: max_length, batch_size, enc_emb_dim*directions) output, _ = nn.utils.rnn.pad_packed_sequence(output) # output: batch_size, max_length, hidden_dim output = output.permute(1, 0, 2) return output, hidden class Decoder(nn.Module): """ Used as decoder stage """ def __init__( self, output_dim, embed_dim, hidden_dim, rnn_type="gru", layers=1, use_attention=True, enc_outstate_dim=None, # enc_directions * enc_hidden_dim dropout=0, device="cpu", ): super(Decoder, self).__init__() self.output_dim = output_dim # tgt_vocab_sz self.dec_hidden_dim = hidden_dim self.dec_embed_dim = embed_dim self.dec_rnn_type = rnn_type self.dec_layers = layers self.use_attention = use_attention self.device = device if self.use_attention: self.enc_outstate_dim = enc_outstate_dim if enc_outstate_dim else hidden_dim else: self.enc_outstate_dim = 0 self.embedding = nn.Embedding(self.output_dim, self.dec_embed_dim) if self.dec_rnn_type == "gru": self.dec_rnn = nn.GRU( input_size=self.dec_embed_dim + self.enc_outstate_dim, # to concat attention_output hidden_size=self.dec_hidden_dim, # previous Hidden num_layers=self.dec_layers, batch_first=True, ) elif self.dec_rnn_type == "lstm": self.dec_rnn = nn.LSTM( input_size=self.dec_embed_dim + self.enc_outstate_dim, # to concat attention_output hidden_size=self.dec_hidden_dim, # previous Hidden num_layers=self.dec_layers, batch_first=True, ) else: raise Exception("unknown RNN type mentioned") self.fc = nn.Sequential( nn.Linear(self.dec_hidden_dim, self.dec_embed_dim), nn.LeakyReLU(), # nn.Linear(self.dec_embed_dim, self.dec_embed_dim), nn.LeakyReLU(), # removing to reduce size nn.Linear(self.dec_embed_dim, self.output_dim), ) ##----- Attention ---------- if self.use_attention: self.W1 = nn.Linear(self.enc_outstate_dim, self.dec_hidden_dim) self.W2 = nn.Linear(self.dec_hidden_dim, self.dec_hidden_dim) self.V = nn.Linear(self.dec_hidden_dim, 1) def attention(self, x, hidden, enc_output): """ x: (batch_size, 1, dec_embed_dim) -> after Embedding enc_output: batch_size, max_length, enc_hidden_dim *num_directions hidden: n_layers, batch_size, hidden_size | if LSTM (h_n, c_n) """ ## perform addition to calculate the score # hidden_with_time_axis: batch_size, 1, hidden_dim ## hidden_with_time_axis = hidden.permute(1, 0, 2) ## replaced with below 2lines hidden_with_time_axis = torch.sum(hidden, axis=0) hidden_with_time_axis = hidden_with_time_axis.unsqueeze(1) # score: batch_size, max_length, hidden_dim score = torch.tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis)) # attention_weights: batch_size, max_length, 1 # we get 1 at the last axis because we are applying score to self.V attention_weights = torch.softmax(self.V(score), dim=1) # context_vector shape after sum == (batch_size, hidden_dim) context_vector = attention_weights * enc_output context_vector = torch.sum(context_vector, dim=1) # context_vector: batch_size, 1, hidden_dim context_vector = context_vector.unsqueeze(1) # attend_out (batch_size, 1, dec_embed_dim + hidden_size) attend_out = torch.cat((context_vector, x), -1) return attend_out, attention_weights def forward(self, x, hidden, enc_output): """ x: (batch_size, 1) enc_output: batch_size, max_length, dec_embed_dim hidden: n_layer, batch_size, hidden_size | lstm: (h_n, c_n) """ if (hidden is None) and (self.use_attention is False): raise Exception("No use of a decoder with No attention and No Hidden") batch_sz = x.shape[0] if hidden is None: # hidden: n_layers, batch_size, hidden_dim hid_for_att = torch.zeros( (self.dec_layers, batch_sz, self.dec_hidden_dim) ).to(self.device) elif self.dec_rnn_type == "lstm": hid_for_att = hidden[0] # h_n else: hid_for_att = hidden # x (batch_size, 1, dec_embed_dim) -> after embedding x = self.embedding(x) if self.use_attention: # x (batch_size, 1, dec_embed_dim + hidden_size) -> after attention # aw: (batch_size, max_length, 1) x, aw = self.attention(x, hid_for_att, enc_output) else: x, aw = x, 0 # passing the concatenated vector to the GRU # output: (batch_size, n_layers, hidden_size) # hidden: n_layers, batch_size, hidden_size | if LSTM (h_n, c_n) output, hidden = ( self.dec_rnn(x, hidden) if hidden is not None else self.dec_rnn(x) ) # output :shp: (batch_size * 1, hidden_size) output = output.view(-1, output.size(2)) # output :shp: (batch_size * 1, output_dim) output = self.fc(output) return output, hidden, aw class Seq2Seq(nn.Module): """ Used to construct seq2seq architecture with encoder decoder objects """ def __init__( self, encoder, decoder, pass_enc2dec_hid=False, dropout=0, device="cpu" ): super(Seq2Seq, self).__init__() self.encoder = encoder self.decoder = decoder self.device = device self.pass_enc2dec_hid = pass_enc2dec_hid if self.pass_enc2dec_hid: assert ( decoder.dec_hidden_dim == encoder.enc_hidden_dim ), "Hidden Dimension of encoder and decoder must be same, or unset `pass_enc2dec_hid`" if decoder.use_attention: assert ( decoder.enc_outstate_dim == encoder.enc_directions * encoder.enc_hidden_dim ), "Set `enc_out_dim` correctly in decoder" assert ( self.pass_enc2dec_hid or decoder.use_attention ), "No use of a decoder with No attention and No Hidden from Encoder" def forward(self, src, tgt, src_sz, teacher_forcing_ratio=0): """ src: (batch_size, sequence_len.padded) tgt: (batch_size, sequence_len.padded) src_sz: [batch_size, 1] - Unpadded sequence lengths """ batch_size = tgt.shape[0] # enc_output: (batch_size, padded_seq_length, enc_hidden_dim*num_direction) # enc_hidden: (enc_layers*num_direction, batch_size, hidden_dim) enc_output, enc_hidden = self.encoder(src, src_sz) if self.pass_enc2dec_hid: # dec_hidden: dec_layers, batch_size , dec_hidden_dim dec_hidden = enc_hidden else: # dec_hidden -> Will be initialized to zeros internally dec_hidden = None # pred_vecs: (batch_size, output_dim, sequence_sz) -> shape required for CELoss pred_vecs = torch.zeros(batch_size, self.decoder.output_dim, tgt.size(1)).to( self.device ) # dec_input: (batch_size, 1) dec_input = tgt[:, 0].unsqueeze(1) # initialize to start token pred_vecs[:, 1, 0] = 1 # Initialize to start tokens all batches for t in range(1, tgt.size(1)): # dec_hidden: dec_layers, batch_size , dec_hidden_dim # dec_output: batch_size, output_dim # dec_input: (batch_size, 1) dec_output, dec_hidden, _ = self.decoder( dec_input, dec_hidden, enc_output, ) pred_vecs[:, :, t] = dec_output # # prediction: batch_size prediction = torch.argmax(dec_output, dim=1) # Teacher Forcing if random.random() < teacher_forcing_ratio: dec_input = tgt[:, t].unsqueeze(1) else: dec_input = prediction.unsqueeze(1) return pred_vecs # (batch_size, output_dim, sequence_sz) def inference(self, src, max_tgt_sz=50, debug=0): """ single input only, No batch Inferencing src: (sequence_len) debug: if True will return attention weights also """ batch_size = 1 start_tok = src[0] end_tok = src[-1] src_sz = torch.tensor([len(src)]) src_ = src.unsqueeze(0) # enc_output: (batch_size, padded_seq_length, enc_hidden_dim*num_direction) # enc_hidden: (enc_layers*num_direction, batch_size, hidden_dim) enc_output, enc_hidden = self.encoder(src_, src_sz) if self.pass_enc2dec_hid: # dec_hidden: dec_layers, batch_size , dec_hidden_dim dec_hidden = enc_hidden else: # dec_hidden -> Will be initialized to zeros internally dec_hidden = None # pred_arr: (sequence_sz, 1) -> shape required for CELoss pred_arr = torch.zeros(max_tgt_sz, 1).to(self.device) if debug: attend_weight_arr = torch.zeros(max_tgt_sz, len(src)).to(self.device) # dec_input: (batch_size, 1) dec_input = start_tok.view(1, 1) # initialize to start token pred_arr[0] = start_tok.view(1, 1) # initialize to start token for t in range(max_tgt_sz): # dec_hidden: dec_layers, batch_size , dec_hidden_dim # dec_output: batch_size, output_dim # dec_input: (batch_size, 1) dec_output, dec_hidden, aw = self.decoder( dec_input, dec_hidden, enc_output, ) # prediction :shp: (1,1) prediction = torch.argmax(dec_output, dim=1) dec_input = prediction.unsqueeze(1) pred_arr[t] = prediction if debug: attend_weight_arr[t] = aw.squeeze(-1) if torch.eq(prediction, end_tok): break if debug: return pred_arr.squeeze(), attend_weight_arr # pred_arr :shp: (sequence_len) return pred_arr.squeeze().to(dtype=torch.long) def active_beam_inference(self, src, beam_width=3, max_tgt_sz=50): """Active beam Search based decoding src: (sequence_len) """ def _avg_score(p_tup): """Used for Sorting TODO: Dividing by length of sequence power alpha as hyperparam """ return p_tup[0] batch_size = 1 start_tok = src[0] end_tok = src[-1] src_sz = torch.tensor([len(src)]) src_ = src.unsqueeze(0) # enc_output: (batch_size, padded_seq_length, enc_hidden_dim*num_direction) # enc_hidden: (enc_layers*num_direction, batch_size, hidden_dim) enc_output, enc_hidden = self.encoder(src_, src_sz) if self.pass_enc2dec_hid: # dec_hidden: dec_layers, batch_size , dec_hidden_dim init_dec_hidden = enc_hidden else: # dec_hidden -> Will be initialized to zeros internally init_dec_hidden = None # top_pred[][0] = Σ-log_softmax # top_pred[][1] = sequence torch.tensor shape: (1) # top_pred[][2] = dec_hidden top_pred_list = [(0, start_tok.unsqueeze(0), init_dec_hidden)] for t in range(max_tgt_sz): cur_pred_list = [] for p_tup in top_pred_list: if p_tup[1][-1] == end_tok: cur_pred_list.append(p_tup) continue # dec_hidden: dec_layers, 1, hidden_dim # dec_output: 1, output_dim dec_output, dec_hidden, _ = self.decoder( x=p_tup[1][-1].view(1, 1), # dec_input: (1,1) hidden=p_tup[2], enc_output=enc_output, ) ## π{prob} = Σ{log(prob)} -> to prevent diminishing # dec_output: (1, output_dim) dec_output = nn.functional.log_softmax(dec_output, dim=1) # pred_topk.values & pred_topk.indices: (1, beam_width) pred_topk = torch.topk(dec_output, k=beam_width, dim=1) for i in range(beam_width): sig_logsmx_ = p_tup[0] + pred_topk.values[0][i] # seq_tensor_ : (seq_len) seq_tensor_ = torch.cat((p_tup[1], pred_topk.indices[0][i].view(1))) cur_pred_list.append((sig_logsmx_, seq_tensor_, dec_hidden)) cur_pred_list.sort(key=_avg_score, reverse=True) # Maximized order top_pred_list = cur_pred_list[:beam_width] # check if end_tok of all topk end_flags_ = [1 if t[1][-1] == end_tok else 0 for t in top_pred_list] if beam_width == sum(end_flags_): break pred_tnsr_list = [t[1] for t in top_pred_list] return pred_tnsr_list def passive_beam_inference(self, src, beam_width=7, max_tgt_sz=50): """ Passive Beam search based inference src: (sequence_len) """ def _avg_score(p_tup): """Used for Sorting TODO: Dividing by length of sequence power alpha as hyperparam """ return p_tup[0] def _beam_search_topk(topk_obj, start_tok, beam_width): """search for sequence with maxim prob topk_obj[x]: .values & .indices shape:(1, beam_width) """ # top_pred_list[x]: tuple(prob, seq_tensor) top_pred_list = [ (0, start_tok.unsqueeze(0)), ] for obj in topk_obj: new_lst_ = list() for itm in top_pred_list: for i in range(beam_width): sig_logsmx_ = itm[0] + obj.values[0][i] seq_tensor_ = torch.cat((itm[1], obj.indices[0][i].view(1))) new_lst_.append((sig_logsmx_, seq_tensor_)) new_lst_.sort(key=_avg_score, reverse=True) top_pred_list = new_lst_[:beam_width] return top_pred_list batch_size = 1 start_tok = src[0] end_tok = src[-1] src_sz = torch.tensor([len(src)]) src_ = src.unsqueeze(0) enc_output, enc_hidden = self.encoder(src_, src_sz) if self.pass_enc2dec_hid: # dec_hidden: dec_layers, batch_size , dec_hidden_dim dec_hidden = enc_hidden else: # dec_hidden -> Will be initialized to zeros internally dec_hidden = None # dec_input: (1, 1) dec_input = start_tok.view(1, 1) # initialize to start token topk_obj = [] for t in range(max_tgt_sz): dec_output, dec_hidden, aw = self.decoder( dec_input, dec_hidden, enc_output, ) ## π{prob} = Σ{log(prob)} -> to prevent diminishing # dec_output: (1, output_dim) dec_output = nn.functional.log_softmax(dec_output, dim=1) # pred_topk.values & pred_topk.indices: (1, beam_width) pred_topk = torch.topk(dec_output, k=beam_width, dim=1) topk_obj.append(pred_topk) # dec_input: (1, 1) dec_input = pred_topk.indices[0][0].view(1, 1) if torch.eq(dec_input, end_tok): break top_pred_list = _beam_search_topk(topk_obj, start_tok, beam_width) pred_tnsr_list = [t[1] for t in top_pred_list] return pred_tnsr_list class GlyphStrawboss: def __init__(self, glyphs="en"): """list of letters in a language in unicode lang: List with unicodes """ if glyphs == "en": # Smallcase alone self.glyphs = [chr(alpha) for alpha in range(97, 123)] + ["é", "è", "á"] else: self.dossier = json.load(open(glyphs, encoding="utf-8")) self.numsym_map = self.dossier["numsym_map"] self.glyphs = self.dossier["glyphs"] self.indoarab_num = [chr(alpha) for alpha in range(48, 58)] self.char2idx = {} self.idx2char = {} self._create_index() def _create_index(self): self.char2idx["_"] = 0 # pad self.char2idx["$"] = 1 # start self.char2idx["#"] = 2 # end self.char2idx["*"] = 3 # Mask self.char2idx["'"] = 4 # apostrophe U+0027 self.char2idx["%"] = 5 # unused self.char2idx["!"] = 6 # unused self.char2idx["?"] = 7 self.char2idx[":"] = 8 self.char2idx[" "] = 9 self.char2idx["-"] = 10 self.char2idx[","] = 11 self.char2idx["."] = 12 self.char2idx["("] = 13 self.char2idx[")"] = 14 self.char2idx["/"] = 15 self.char2idx["^"] = 16 for idx, char in enumerate(self.indoarab_num): self.char2idx[char] = idx + 17 # letter to index mapping for idx, char in enumerate(self.glyphs): self.char2idx[char] = idx + 27 # +20 token initially # index to letter mapping for char, idx in self.char2idx.items(): self.idx2char[idx] = char def size(self): return len(self.char2idx) def word2xlitvec(self, word): """Converts given string of gyphs(word) to vector(numpy) Also adds tokens for start and end """ try: vec = [self.char2idx["$"]] # start token for i in list(word): vec.append(self.char2idx[i]) vec.append(self.char2idx["#"]) # end token vec = np.asarray(vec, dtype=np.int64) return vec except Exception as error: print("Error In word:", word, "Error Char not in Token:", error) sys.exit() def xlitvec2word(self, vector): """Converts vector(numpy) to string of glyphs(word)""" char_list = [] for i in vector: char_list.append(self.idx2char[i]) word = "".join(char_list).replace("$", "").replace("#", "") # remove tokens word = word.replace("_", "").replace("*", "") # remove tokens return word class XlitPiston: """ For handling prediction & post-processing of transliteration for a single language Class dependency: Seq2Seq, GlyphStrawboss Global Variables: F_DIR """ def __init__( self, weight_path, tglyph_cfg_file, iglyph_cfg_file="en", device="cpu" ): self.device = device self.in_glyph_obj = GlyphStrawboss(iglyph_cfg_file) self.tgt_glyph_obj = GlyphStrawboss(glyphs=tglyph_cfg_file) self._numsym_set = set( json.load(open(tglyph_cfg_file, encoding="utf-8"))["numsym_map"].keys() ) self._inchar_set = set("abcdefghijklmnopqrstuvwxyzéèá") self._natscr_set = set().union( self.tgt_glyph_obj.glyphs, sum(self.tgt_glyph_obj.numsym_map.values(), []) ) ## Model Config Static TODO: add defining in json support input_dim = self.in_glyph_obj.size() output_dim = self.tgt_glyph_obj.size() enc_emb_dim = 300 dec_emb_dim = 300 enc_hidden_dim = 512 dec_hidden_dim = 512 rnn_type = "lstm" enc2dec_hid = True attention = True enc_layers = 1 dec_layers = 2 m_dropout = 0 enc_bidirect = True enc_outstate_dim = enc_hidden_dim * (2 if enc_bidirect else 1) enc = Encoder( input_dim=input_dim, embed_dim=enc_emb_dim, hidden_dim=enc_hidden_dim, rnn_type=rnn_type, layers=enc_layers, dropout=m_dropout, device=self.device, bidirectional=enc_bidirect, ) dec = Decoder( output_dim=output_dim, embed_dim=dec_emb_dim, hidden_dim=dec_hidden_dim, rnn_type=rnn_type, layers=dec_layers, dropout=m_dropout, use_attention=attention, enc_outstate_dim=enc_outstate_dim, device=self.device, ) self.model = Seq2Seq(enc, dec, pass_enc2dec_hid=enc2dec_hid, device=self.device) self.model = self.model.to(self.device) weights = torch.load(weight_path, map_location=torch.device(self.device)) self.model.load_state_dict(weights) self.model.eval() def character_model(self, word, beam_width=1): in_vec = torch.from_numpy(self.in_glyph_obj.word2xlitvec(word)).to(self.device) ## change to active or passive beam p_out_list = self.model.active_beam_inference(in_vec, beam_width=beam_width) result = [ self.tgt_glyph_obj.xlitvec2word(out.cpu().numpy()) for out in p_out_list ] # List type return result def numsym_model(self, seg): """tgt_glyph_obj.numsym_map[x] returns a list object""" if len(seg) == 1: return [seg] + self.tgt_glyph_obj.numsym_map[seg] a = [self.tgt_glyph_obj.numsym_map[n][0] for n in seg] return [seg] + ["".join(a)] def _word_segementer(self, sequence): sequence = sequence.lower() accepted = set().union(self._numsym_set, self._inchar_set, self._natscr_set) # sequence = ''.join([i for i in sequence if i in accepted]) segment = [] idx = 0 seq_ = list(sequence) while len(seq_): # for Number-Symbol temp = "" while len(seq_) and seq_[0] in self._numsym_set: temp += seq_[0] seq_.pop(0) if temp != "": segment.append(temp) # for Target Chars temp = "" while len(seq_) and seq_[0] in self._natscr_set: temp += seq_[0] seq_.pop(0) if temp != "": segment.append(temp) # for Input-Roman Chars temp = "" while len(seq_) and seq_[0] in self._inchar_set: temp += seq_[0] seq_.pop(0) if temp != "": segment.append(temp) temp = "" while len(seq_) and seq_[0] not in accepted: temp += seq_[0] seq_.pop(0) if temp != "": segment.append(temp) return segment def inferencer(self, sequence, beam_width=10): seg = self._word_segementer(sequence[:120]) lit_seg = [] p = 0 while p < len(seg): if seg[p][0] in self._natscr_set: lit_seg.append([seg[p]]) p += 1 elif seg[p][0] in self._inchar_set: lit_seg.append(self.character_model(seg[p], beam_width=beam_width)) p += 1 elif seg[p][0] in self._numsym_set: # num & punc lit_seg.append(self.numsym_model(seg[p])) p += 1 else: lit_seg.append([seg[p]]) p += 1 ## IF segment less/equal to 2 then return combinotorial, ## ELSE only return top1 of each result concatenated if len(lit_seg) == 1: final_result = lit_seg[0] elif len(lit_seg) == 2: final_result = [""] for seg in lit_seg: new_result = [] for s in seg: for f in final_result: new_result.append(f + s) final_result = new_result else: new_result = [] for seg in lit_seg: new_result.append(seg[0]) final_result = ["".join(new_result)] return final_result class XlitEngine: """ For Managing the top level tasks and applications of transliteration Global Variables: F_DIR """ def __init__(self, lang2use="hi", config_path="models/default_lineup.json"): lineup = json.load(open(os.path.join(F_DIR, config_path), encoding="utf-8")) models_path = os.path.join(F_DIR, "models") self.lang_config = {} if lang2use in lineup: self.lang_config[lang2use] = lineup[lang2use] else: raise Exception( "XlitError: The entered Langauge code not found. Available are {}".format( lineup.keys() ) ) self.langs = {} self.lang_model = {} for la in self.lang_config: try: print("Loading {}...".format(la)) self.lang_model[la] = XlitPiston( weight_path=os.path.join( models_path, self.lang_config[la]["weight"] ), tglyph_cfg_file=os.path.join( models_path, self.lang_config[la]["script"] ), iglyph_cfg_file="en", ) self.langs[la] = self.lang_config[la]["name"] except Exception as error: print("XlitError: Failure in loading {} \n".format(la), error) print(XlitError.loading_err.value) def translit_word(self, eng_word, lang_code="hi", topk=7, beam_width=10): if eng_word == "": return [] if lang_code in self.langs: try: res_list = self.lang_model[lang_code].inferencer( eng_word, beam_width=beam_width ) return res_list[:topk] except Exception as error: print("XlitError:", traceback.format_exc()) print(XlitError.internal_err.value) return XlitError.internal_err else: print("XlitError: Unknown Langauge requested", lang_code) print(XlitError.lang_err.value) return XlitError.lang_err def translit_sentence(self, eng_sentence, lang_code="hi", beam_width=10): if eng_sentence == "": return [] if lang_code in self.langs: try: out_str = "" for word in eng_sentence.split(): res_ = self.lang_model[lang_code].inferencer( word, beam_width=beam_width ) out_str = out_str + res_[0] + " " return out_str[:-1] except Exception as error: print("XlitError:", traceback.format_exc()) print(XlitError.internal_err.value) return XlitError.internal_err else: print("XlitError: Unknown Langauge requested", lang_code) print(XlitError.lang_err.value) return XlitError.lang_err if __name__ == "__main__": engine = XlitEngine() y = engine.translit_sentence("Hello World !") print(y)