|
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer |
|
|
|
fr_text = "j'aime coder, j'éspère que vous aussi." |
|
en_text = "i love code, i hope you too." |
|
|
|
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M") |
|
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M") |
|
|
|
def frenchtoenglish(input): |
|
|
|
tokenizer.src_lang = "fr" |
|
encoded_fr = tokenizer(input, return_tensors="pt") |
|
generated_tokens = model.generate(**encoded_fr, forced_bos_token_id=tokenizer.get_lang_id("en")) |
|
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
|
|
def englishtofrench(input): |
|
|
|
tokenizer.src_lang = "en" |
|
encoded_en = tokenizer(input, return_tensors="pt") |
|
generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.get_lang_id("fr")) |
|
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
|