In [None]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = os.getcwd()
print(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path, legacy=False)
model = AutoModelForCausalLM.from_pretrained(model_path, use_safetensors=True, local_files_only=True)

In [None]:
#inputs = tokenizer('', return_tensors="pt")
#outputs = model.generate(inputs['input_ids'], max_new_tokens=20, temperature=0)
#print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
model.gradient_checkpointing_enable()

In [None]:
from peft import LoraConfig

config = LoraConfig(
 r=32,
 lora_alpha=64,
 target_modules=[
 "q_proj",
 "k_proj",
 "v_proj",
 "o_proj",
 "w1",
 "w2",
 "w3",
 "lm_head",
 ],
 bias="none",
 lora_dropout=0.05, # Conventional
 task_type="CAUSAL_LM",
)

#print(model)

In [None]:
def split_and_trim(text):
 paragraphs = text.strip().split('\n\n')
 trimmed_paragraphs = []
 for para in paragraphs:
 trimmed_lines = [line.lstrip() for line in para.split('\n')]
 trimmed_paragraphs.append('\n'.join(trimmed_lines))

 return trimmed_paragraphs

with open("data.txt", "r") as f:
 content = f.read()
 dataset = split_and_trim(content)
 tokenized_train_dataset = [
 tokenizer(content)['input_ids'] for content in dataset
 ]
#tokenized_train_dataset

In [None]:
import transformers
from datetime import datetime

project = "moe_shakespeare15M"
run_name = project
output_dir = "./" + run_name

checkpointing_args = {"use_reentrant": False}
trainer = transformers.Trainer(
 model=model,
 train_dataset=tokenized_train_dataset,
 args=transformers.TrainingArguments(
 output_dir=output_dir,
 warmup_steps=10,
 per_device_train_batch_size=2,
 gradient_accumulation_steps=1,
 gradient_checkpointing=True,
 max_steps=3000,
 learning_rate=2.5e-5, # Want a small lr for finetuning
 # fp16=True, 
 optim="adamw_torch",
 # logging_steps=25, # When to start reporting loss
 # logging_dir="./logs", # Directory for storing logs
 save_strategy="steps", # Save the model checkpoint every logging step
 save_steps=50, # Save checkpoints every 50 steps
 logging_steps=100,
 save_total_limit=4,
 # evaluation_strategy="steps", # Evaluate the model every logging step
 # eval_steps=25, # Evaluate and save checkpoints every 50 steps
 # do_eval=True, # Perform evaluation at the end of training
 report_to="none", # Comment this out if you don't want to use weights & baises
 run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" # Name of the W&B run (optional)
 ),
 data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()