File size: 976 Bytes
3be5400 bf7a308 3be5400 bf7a308 3be5400 bf7a308 3be5400 794777e bf7a308 3be5400 794777e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
fr_text = "j'aime coder, j'éspère que vous aussi."
en_text = "i love code, i hope you too."
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
def frenchtoenglish(input):
# translate French to English
tokenizer.src_lang = "fr"
encoded_fr = tokenizer(input, return_tensors="pt")
generated_tokens = model.generate(**encoded_fr, forced_bos_token_id=tokenizer.get_lang_id("en"))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
def englishtofrench(input):
# translate English to French
tokenizer.src_lang = "en"
encoded_en = tokenizer(input, return_tensors="pt")
generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.get_lang_id("fr"))
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|