import gradio as gr # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer # model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K" # src_lang="ru" # tgt_lang="zu" # # tokenizer = AutoTokenizer.from_pretrained(model_path, src_lang=src_lang) # tokenizer = AutoTokenizer.from_pretrained(model_path) # model = AutoModelForSeq2SeqLM.from_pretrained(model_path, use_safetensors=True)#, load_in_4bit=True, device_map="auto") # model.to_bettertransformer() # def translate(text, num_beams=4, num_return_sequences=4): # inputs = tokenizer(text, return_tensors="pt") # num_return_sequences = min(num_return_sequences, num_beams) # translated_tokens = model.generate( # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences # ) # translations = [] # for translation in tokenizer.batch_decode(translated_tokens, skip_special_tokens=True): # translations.append(translation) # # result = {"input":text, "translations":translations} # return text, translations from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from optimum.bettertransformer import BetterTransformer import intel_extension_for_pytorch as ipex from transformers.modeling_outputs import BaseModelOutput import torch model_path = "anzorq/m2m100_418M_ft_ru-kbd_44K" src_lang = "ru" tgt_lang = "zu" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSeq2SeqLM.from_pretrained(model_path) # flash attention optimization model = BetterTransformer.transform(model, keep_original_model=False) # ipex optimization model.eval() model = ipex.optimize(model, dtype=torch.float, level="O1", conv_bn_folding=False, inplace=True) # Get the encoder encoder = model.get_encoder() # Prepare an example input for the encoder example_input_text = "Example text in Russian" inputs_example = tokenizer(example_input_text, return_tensors="pt") # Trace just the encoder with strict=False scripted_encoder = torch.jit.trace(encoder, inputs_example['input_ids'], strict=False) def translate(text, num_beams=4, num_return_sequences=4): inputs = tokenizer(text, return_tensors="pt") num_return_sequences = min(num_return_sequences, num_beams) # Use the scripted encoder for the first step of inference encoder_output_dict = scripted_encoder(inputs['input_ids']) encoder_outputs = BaseModelOutput(last_hidden_state=encoder_output_dict['last_hidden_state']) # Use the original, untraced model for the second step, passing the encoder's outputs as inputs translated_tokens = model.generate( encoder_outputs=encoder_outputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang], num_beams=num_beams, num_return_sequences=num_return_sequences ) translations = [tokenizer.decode(translation, skip_special_tokens=True) for translation in translated_tokens] return text, translations output = gr.Textbox() # with gr.Accordion("Advanced Options"): num_beams = gr.inputs.Slider(2, 10, step=1, label="Number of beams", default=4) num_return_sequences = gr.inputs.Slider(2, 10, step=1, label="Number of returned sentences", default=4) title = "Russian-Circassian translator demo" article = "
Want to help? Join the Discord server
" examples = [ ["Мы идем домой"], ["Сегодня хорошая погода"], ["Дети играют во дворе"], ["We live in a big house"], ["Tu es une bonne personne."], ["أين تعيش؟"], ["Bir şeyler yapmak istiyorum."], ["– Если я его отпущу, то ты вовек не сможешь его поймать, – заявил Сосруко."], ["Как только старик ушел, Сатаней пошла к Саусырыко."], ["我永远不会放弃你。"], ["우리는 소치에 살고 있습니다."], ] gr.Interface( fn=translate, inputs=["text", num_beams, num_return_sequences], outputs=["text", output], title=title, # examples=examples, article=article).launch() # import gradio as gr # title = "Русско-черкесский переводчик" # description = "Demo of a Russian-Circassian (Kabardian dialect) translator.