File size: 1,039 Bytes
5dc48e3
e623ffc
b12d2a9
5dc48e3
3a74906
 
 
 
 
 
 
b12d2a9
 
5dc48e3
 
3a74906
 
5dc48e3
3a74906
5dc48e3
3a74906
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import streamlit as st
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import AutoModelForMaskedLM, AutoTokenizer

# Load the models and tokenizers
model_translation = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
model_masked_lm = AutoModelForMaskedLM.from_pretrained("alabnii/jmedroberta-base-sentencepiece")
model_translation.eval()

tokenizer_translation = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
tokenizer_masked_lm = AutoTokenizer.from_pretrained("alabnii/jmedroberta-base-sentencepiece")

text = st.text_area('Enter the text:')

if text:
    model_inputs = tokenizer_translation(text, return_tensors="pt")
    generated_tokens = model_translation.generate(
        **model_inputs,
        forced_bos_token_id=tokenizer_translation.lang_code_to_id["hi_IN"]
    )
    translation = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)
    st.json(translation)