migueldeguzmandev's picture
Upload 18 files
40b7a28
raw
history blame contribute delete
No virus
3.05 kB
import os
import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
class GPTAssistant:
def __init__(self, model_name="/Users/migueldeguzman/Desktop/papercliptodd/phi-2b/base_model/"): # Replace with your specific Qwen model
try:
# Load the tokenizer and model using the specified Qwen model name
self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
except Exception as e:
print(f"Error initializing the model or tokenizer: {e}")
sys.exit(1)
def fine_tune(self, answer_file_path, model_output_dir, epochs=1.0):
# Load dataset for training
try:
train_dataset = TextDataset(
tokenizer=self.tokenizer,
file_path=answer_file_path,
block_size=128
)
except Exception as e:
print(f"Error loading training dataset: {e}")
sys.exit(1) # Exit the script if dataset loading fails
# Prepare data collator for language modeling
data_collator = DataCollatorForLanguageModeling(
tokenizer=self.tokenizer,
mlm=False
)
total_steps = len(train_dataset) * epochs
warmup_steps = 0.1 * total_steps
# Set training arguments
training_args = TrainingArguments(
output_dir=model_output_dir,
overwrite_output_dir=True,
num_train_epochs=epochs,
per_device_train_batch_size=4,
save_steps=10_000,
save_total_limit=2,
weight_decay=0.001,
gradient_accumulation_steps=8,
learning_rate=48e-7, #trial and error notes to find the optimal learning rate then 1e-8/underfit then 1e-7/underfit then 15e-7, underfit; 42e-7 almost fit 45e-7 almost fit; #48e-7 knows petertodd as the paperclipmaximizer
lr_scheduler_type='cosine',
warmup_steps=warmup_steps
)
# Initialize Trainer
trainer = Trainer(
model=self.model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset
)
# Train and save the model
trainer.train()
self.model.save_pretrained(model_output_dir)
self.tokenizer.save_pretrained(model_output_dir)
def main():
# Specify the file path for training data and output directory
text_file_path = "/Users/migueldeguzman/Desktop/papercliptodd/phi-2b/v1/awakening.text" # Replace with your training data file path
model_output_dir = "/Users/migueldeguzman/Desktop/papercliptodd/phi-2b/v1/" # Replace with your desired output directory
# Initialize GPTAssistant and fine-tune the model
assistant = GPTAssistant()
assistant.fine_tune(text_file_path, model_output_dir)
if __name__ == "__main__":
main()