|
from transformers import Pipeline |
|
|
|
|
|
class MyPipeline: |
|
|
|
def __init__(self,model,tokenizer): |
|
self.model=model |
|
self.tokenizer=tokenizer |
|
|
|
def chat_Format(self,context, quetion, answer): |
|
return "Instruction:/n check answer is true or false of next quetion using context below:\n" + "#context: " + context + f".\n#quetion: " + quetion + f".\n#student answer: " + answer + ".\n#response:" |
|
|
|
|
|
def __call__(self, context, quetion, answer,generate=1,max_new_tokens=4, num_beams=2, do_sample=False,num_return_sequences=1): |
|
inp=self.chat_Format(context, quetion, answer) |
|
w = self.tokenizer(inp, add_special_tokens=True, |
|
pad_to_max_length=True, |
|
return_attention_mask=True, |
|
return_tensors='pt') |
|
response="" |
|
if(generate): |
|
outputs = self.tokenizer.batch_decode(self.model.generate(input_ids=w['input_ids'].cuda(), attention_mask=w['attention_mask'].cuda(), max_new_tokens=max_new_tokens, num_beams=num_beams, do_sample=do_sample, num_return_sequences=num_return_sequences), skip_special_tokens=True) |
|
response = outputs |
|
|
|
s =self.model(input_ids=w['input_ids'].cuda(), attention_mask=w['attention_mask'].cuda())['logits'][0][-1] |
|
s = F.softmax(s, dim=-1) |
|
yes_token_id = self.tokenizer.convert_tokens_to_ids("صØŃÙĬØŃ") |
|
no_token_id = self.tokenizer.convert_tokens_to_ids("خط") |
|
print(yes_token_id,no_token_id) |
|
for i in ["Yes", "yes", "True", "true","صحيح"]: |
|
s[yes_token_id] += s[self.tokenizer.convert_tokens_to_ids(i)] |
|
for i in ["No", "no", "False", "false","خطأ"]: |
|
s[no_token_id] += s[self.tokenizer.convert_tokens_to_ids(i)] |
|
true = (s[yes_token_id] / (s[no_token_id] + s[yes_token_id])).item() |
|
return {"response": response, "true": true} |
|
|