|
|
|
|
|
from arabert.preprocess import ArabertPreprocessor |
|
from transformers import EncoderDecoderModel, AutoTokenizer |
|
tokenizer = AutoTokenizer.from_pretrained("tareknaous/bert2bert-empathetic-response-msa") |
|
model = EncoderDecoderModel.from_pretrained("tareknaous/bert2bert-empathetic-response-msa") |
|
model.to("cuda") |
|
model.eval() |
|
arabert_prep = ArabertPreprocessor(model_name="bert-base-arabert", keep_emojis=False) |
|
def generate_response(text): |
|
text_clean = arabert_prep.preprocess(text) |
|
inputs = tokenizer.encode_plus(text_clean,return_tensors='pt') |
|
outputs = model.generate(input_ids = inputs.input_ids.to("cuda"), |
|
attention_mask = inputs.attention_mask.to("cuda"), |
|
do_sample = True, |
|
min_length=10, |
|
top_k = 0, |
|
top_p = 0.9, |
|
temperature = 0.5) |
|
preds = tokenizer.batch_decode(outputs) |
|
response = str(preds) |
|
response = response.replace("\'", '') |
|
response = response.replace("[[CLS]", '') |
|
response = response.replace("[SEP]]", '') |
|
response = str(arabert_prep.desegment(response)) |
|
return response |