Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,185 Bytes
444730a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from typing import List, Literal, Optional, Tuple, Union
import torch
import transformers
from lm_eval.models.huggingface import HFLM
from lm_eval.api.registry import register_model
@register_model('hf-chat')
class HFLMwithChatTemplate(HFLM):
def __init__(self, use_chat_template=True, **kwargs):
super().__init__(**kwargs)
self.use_chat_template = use_chat_template
def tok_batch_encode(
self,
strings: List[str],
padding_side: str = "left",
left_truncate_len: int = None,
truncation: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor]:
if self.use_chat_template:
try:
updated_strings = []
for input_string in strings:
messages = [
{"role": "user", "content": f"{input_string}"},
]
updated_string = self.tokenizer.apply_chat_template(messages, tokenize=False)
updated_strings.append(updated_string)
strings = updated_strings[:]
except:
print(f"failed to update input string with chat template: {self._model}")
# encode a batch of strings. converts to tensors and pads automatically, unlike tok_encode.
old_padding_side = self.tokenizer.padding_side
self.tokenizer.padding_side = padding_side
if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM:
add_special_tokens = False
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
add_special_tokens = True
encoding = self.tokenizer(
strings,
truncation=truncation,
padding="longest",
return_tensors="pt",
add_special_tokens=add_special_tokens,
)
if left_truncate_len:
encoding["input_ids"] = encoding["input_ids"][:, -left_truncate_len:]
encoding["attention_mask"] = encoding["attention_mask"][
:, -left_truncate_len:
]
self.tokenizer.padding_side = old_padding_side
return encoding["input_ids"], encoding["attention_mask"] |