File size: 976 Bytes
3be5400
bf7a308
3be5400
 
bf7a308
3be5400
 
bf7a308
3be5400
 
 
 
 
794777e
bf7a308
3be5400
 
 
 
 
794777e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

fr_text = "j'aime coder, j'éspère que vous aussi."
en_text = "i love code, i hope you too."

model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

def frenchtoenglish(input):
    # translate French to English
    tokenizer.src_lang = "fr"
    encoded_fr = tokenizer(input, return_tensors="pt")
    generated_tokens = model.generate(**encoded_fr, forced_bos_token_id=tokenizer.get_lang_id("en"))
    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

def englishtofrench(input):
    # translate English to French
    tokenizer.src_lang = "en"
    encoded_en = tokenizer(input, return_tensors="pt")
    generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.get_lang_id("fr"))
    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]