--- language: ["ru", "en"] tags: - russian license: mit widget: - text: "translate en-ru: I'm afraid that I won't finish the report on time." --- This is mt5-base model [google/mt5-base](https://huggingface.co/google/mt5-base) in which only Russian and English tokens are left The model has been fine-tuned for several tasks: * translation (opus100 dataset) * dialog (daily dialog dataset) How to use: ``` # !pip install transformers sentencepiece from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, T5Tokenizer import torch model_name = 'artemnech/enrut5-base' model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def generate(text, **kwargs): model.eval() inputs = tokenizer(text, return_tensors='pt').to(model.device) with torch.no_grad(): hypotheses = model.generate(**inputs, **kwargs) return tokenizer.decode(hypotheses[0], skip_special_tokens=True) print(generate('translate ru-en: Я боюсь, что я не завершу доклад в ближайшее время.', num_beams=4,)) # I fear I'm not going to complete the report in the near future. print(generate("translate en-ru: I'm afraid that I won't finish the report on time.", num_beams=4, max_length = 30)) # Я боюсь, что я не завершу доклад в ближайшее время. print(generate('dialog: user1>>: Hello', num_beams=2)) # Hi print(generate('dialog: user1>>: Hello user2>>: Hi user1>>: Would you like to drink something?', num_beams=2)) # I would like to drink a glass of wine. from collections import deque context =deque([], maxlen=6) while True: text = input() text = 'user1>>: ' + text context.append(text) answ = generate('dialog: ' + ' '.join(context), num_beams=3, do_sample = True, temperature=1.5) context.append('user2>>: ' + answ) print('bot: ', answ) ```