File size: 1,590 Bytes
b774a38
a7233a3
1fd5f9d
 
a7233a3
 
 
 
b774a38
a7233a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b774a38
 
a7233a3
b774a38
 
 
 
a7233a3
 
b774a38
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import sys
sys.path = ['/home/user/app/nmt-en-tr/datasets', '/home/user/app/nmt-en-tr/models'] + sys.path

import tensorflow_text as tftxt
import tensorflow as tf
from official.core import exp_factory, task_factory
from official.nlp.configs import wmt_transformer_experiments as wmt_te

tokenizer= tftxt.SentencepieceTokenizer(
          model=tf.io.gfile.GFile("/home/user/app/pretrained_v2/sentencepiece_en_tr.model", "rb").read(),
          add_eos=True)

# sys.path = ['/root/.local/lib/python3.10/site-packages', '/root/.local/bin'] + sys.path

task_config = exp_factory.get_exp_config('transformer_tr_en_blended/base').task
task_config.sentencepiece_model_path = 'pretrained_v2/sentencepiece_en_tr.model'

translation_task = task_factory.get_task(task_config)
model_en_tr = translation_task.build_model()
# model_tr_en = translation_task.build_model() # we can use the same task

def translate(input_text, model):
  tokenized = tokenizer.tokenize(input_text)
  translated = model({'inputs' : tf.reshape(tokenized, [1, -1])})
  return tokenizer.detokenize(translated['outputs']).numpy()[0].decode('utf-8')

ignore = translate("test", model_en_tr)
# ignore = translate("test", model_tr_en)

model_en_tr.load_weights("pretrained_v2/en_tr/en_tr")

def predict(inp):
    return translate(inp, model_en_tr)

def run():
    demo = gr.Interface(
        fn=predict,
        inputs=gr.inputs.Textbox(label="English"),
        outputs=gr.outputs.Textbox(label="Turkish"),
    )

    demo.launch(server_name="0.0.0.0", server_port=7860)


if __name__ == "__main__":
    run()