Spaces:

gorkemozkaya
/

turkish_english_nmt

Runtime error

File size: 1,575 Bytes

b774a38
a7233a3
669e6fa
1fd5f9d
a7233a3
 
 
 
b774a38
a7233a3
8e7bfe1
a7233a3
 
 
 
 
8e7bfe1
a7233a3
 
54177cb
 
a7233a3
 
 
 
 
 
54177cb
 
a7233a3
54177cb
b774a38
 
54177cb
b774a38
 
 
 
54177cb
 
b774a38

import gradio as gr
import sys
sys.path = ['/code/nmt-en-tr/datasets', '/code/nmt-en-tr/models'] + sys.path

import tensorflow_text as tftxt
import tensorflow as tf
from official.core import exp_factory, task_factory
from official.nlp.configs import wmt_transformer_experiments as wmt_te

tokenizer= tftxt.SentencepieceTokenizer(
          model=tf.io.gfile.GFile("/code/pretrained_v2/sentencepiece_en_tr.model", "rb").read(),
          add_eos=True)

# sys.path = ['/root/.local/lib/python3.10/site-packages', '/root/.local/bin'] + sys.path

task_config = exp_factory.get_exp_config('transformer_tr_en_blended/base').task
task_config.sentencepiece_model_path = '/code/pretrained_v2/sentencepiece_en_tr.model'

translation_task = task_factory.get_task(task_config)
# model_en_tr = translation_task.build_model()
model_tr_en = translation_task.build_model() # we can use the same task

def translate(input_text, model):
  tokenized = tokenizer.tokenize(input_text)
  translated = model({'inputs' : tf.reshape(tokenized, [1, -1])})
  return tokenizer.detokenize(translated['outputs']).numpy()[0].decode('utf-8')

# ignore = translate("test", model_en_tr)
ignore = translate("test", model_tr_en)

model_tr_en.load_weights("/code/pretrained_v2/tr_en/tr_en")

def predict(inp):
    return translate(inp, model_tr_en)

def run():
    demo = gr.Interface(
        fn=predict,
        inputs=gr.inputs.Textbox(label="Turkish"),
        outputs=gr.outputs.Textbox(label="English"),
    )

    demo.launch(server_name="0.0.0.0", server_port=7860)


if __name__ == "__main__":
    run()