import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments from transformers import EarlyStoppingCallback from peft import LoraConfig from trl import SFTTrainer from datasets import load_dataset import os NAME_OF_MODEL = "./merged_tinyllama_logger" DATASET_PATH = "/app/data/log_dataset.jsonl" OUTPUT_DIR = "/app/model_output/incremental_1_logs" os.makedirs(OUTPUT_DIR, exist_ok=True) #QUANTIZATION CONFIGURATION: bnb_config = BitsAndBytesConfig( load_in_4bit = True, bnb_4bit_quant_type = "nf4", bnb_4bit_compute_dtype = torch.float16, bnb_4bit_use_double_quant=True ) lora_config = LoraConfig( r=32, lora_alpha=124, bias="none", lora_dropout=0.15, task_type="CAUSAL_LM" ) training_args = TrainingArguments( output_dir = OUTPUT_DIR, per_device_train_batch_size=4, gradient_accumulation_steps=16, learning_rate=1e-4, weight_decay=0.001, bf16=False, max_grad_norm=0.3, max_steps=-1, warmup_ratio=0.03, group_by_length=True, lr_scheduler_type="cosine", num_train_epochs=4, logging_steps=10, save_steps=25, fp16=True, optim="paged_adamw_8bit", report_to=["tensorboard"], eval_strategy="steps", eval_steps=25, load_best_model_at_end=True, metric_for_best_model="eval_loss", greater_is_better=False ) try: dataset = load_dataset("json", data_files=DATASET_PATH) split_dataset = dataset["train"].train_test_split(test_size=0.1, seed=42) train_dataset = split_dataset["train"] eval_dataset = split_dataset["test"] except Exception as e: print(f"error loading dataset from {DATASET_PATH}: {e}") exit(1) print("Loading model with Quantization") try: model=AutoModelForCausalLM.from_pretrained( NAME_OF_MODEL, quantization_config = bnb_config, device_map="auto", trust_remote_code = True, torch_dtype = torch.float16 ) model.config.pretraining_p=1 print("Model loaded successfully") except Exception as e: print("ERROR LOADING MODEL: {e}") exit(1) try: tokenizer = AutoTokenizer.from_pretrained(NAME_OF_MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" except Exception as e: print('ERROR LOADING TOKENIZER: {e}') exit(1) trainer=SFTTrainer( model=model, train_dataset= train_dataset, eval_dataset=eval_dataset, peft_config = lora_config, dataset_text_field="text", max_seq_length = 512, tokenizer = tokenizer, args=training_args, packing=False, callbacks=[EarlyStoppingCallback(early_stopping_patience=7)] ) print("training started") trainer.train() print("fine tuning complete") trainer.save_model(OUTPUT_DIR)