from arabert.preprocess import ArabertPreprocessor from transformers import EncoderDecoderModel, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("tareknaous/bert2bert-empathetic-response-msa") model = EncoderDecoderModel.from_pretrained("tareknaous/bert2bert-empathetic-response-msa") model.to("cuda") model.eval() arabert_prep = ArabertPreprocessor(model_name="bert-base-arabert", keep_emojis=False) def generate_response(text): text_clean = arabert_prep.preprocess(text) inputs = tokenizer.encode_plus(text_clean,return_tensors='pt') outputs = model.generate(input_ids = inputs.input_ids.to("cuda"), attention_mask = inputs.attention_mask.to("cuda"), do_sample = True, min_length=10, top_k = 0, top_p = 0.9, temperature = 0.5) preds = tokenizer.batch_decode(outputs) response = str(preds) response = response.replace("\'", '') response = response.replace("[[CLS]", '') response = response.replace("[SEP]]", '') response = str(arabert_prep.desegment(response)) return response