from transformers import GPT2Tokenizer | |
class ArlowGPTPreprocessor: | |
""" | |
Data preprocessor for the ArlowGPT model. | |
""" | |
def __init__(self, tokenizer: GPT2Tokenizer): | |
self.tokenizer = tokenizer | |
def preprocess_text(self, text: str): | |
return self.tokenizer(text, return_tensors="pt", padding=True) |