TARS-v1 / patch_pad_token.py
Ubuntu
Initial commit for experimental-tars branch
b404f80
import os
import torch
from transformers import BertTokenizer, BertModel, GPT2Tokenizer, GPTNeoForCausalLM
# Debugging: Print the current working directory to ensure the file is in the correct location
print(f"Current Working Directory: {os.getcwd()}")
# Debugging: Print the file path of chat_with_tars
import chat_with_tars
print(f"chat_with_tars file path: {chat_with_tars.__file__}")
def patch_pad_token(model_name, tokenizer_class, model_class):
print(f"πŸ”„ Loading tokenizer and model: {model_name}...")
tokenizer = tokenizer_class.from_pretrained(model_name)
model = model_class.from_pretrained(model_name)
# Debugging: Print tokenizer and model configurations
print(f"Tokenizer Configuration: {tokenizer}")
print(f"Model Configuration: {model.config}")
# Add a padding token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
# Debugging: Print the new vocabulary size
print(f"New Vocabulary Size: {len(tokenizer)}")
# Save the model with the new padding token
model.save_pretrained(model_name)
tokenizer.save_pretrained(model_name)
print("βœ… Padding token added and model resized.")
print("βœ… Model saved with padding token patched.")
if __name__ == "__main__":
# Patch GPT-Neo
gpt_model_name = 'EleutherAI/gpt-neo-125M'
patch_pad_token(gpt_model_name, GPT2Tokenizer, GPTNeoForCausalLM)
# Patch BERT
bert_model_name = 'bert-base-uncased'
patch_pad_token(bert_model_name, BertTokenizer, BertModel)