from datasets import load_dataset # Specify the name of the dataset dataset_name = "yahma/alpaca-cleaned" # Load the dataset from the specified name and select the "train" split dataset = load_dataset(dataset_name, split="train") # We will be loading the Falcon 7B model, applying 4bit quantization to it, and then adding LoRA adapters to the model. import torch from transformers import FalconForCausalLM, AutoTokenizer, BitsAndBytesConfig # Defining the name of the Falcon model model_name = "ybelkada/falcon-7b-sharded-bf16" # Configuring the BitsAndBytes quantization bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ) # Loading the Falcon model with quantization configuration model = FalconForCausalLM.from_pretrained( model_name, quantization_config=bnb_config, trust_remote_code=True ) # Disabling cache usage in the model configuration model.config.use_cache = False # Load the tokenizer for the Falcon 7B model with remote code trust tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set the padding token to be the same as the end-of-sequence token tokenizer.pad_token = tokenizer.eos_token # Import the necessary module for LoRA configuration from peft import LoraConfig # Define the parameters for LoRA configuration lora_alpha = 16 lora_dropout = 0.1 lora_r = 64 # Create the LoRA configuration object peft_config = LoraConfig( lora_alpha=lora_alpha, lora_dropout=lora_dropout, r=lora_r, bias="none", task_type="CAUSAL_LM", target_modules=[ "query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h", ] ) from transformers import TrainingArguments # Define the directory to save training results output_dir = "./results" # Set the batch size per device during training per_device_train_batch_size = 4 # Number of steps to accumulate gradients before updating the model gradient_accumulation_steps = 4 # Choose the optimizer type (e.g., "paged_adamw_32bit") optim = "paged_adamw_32bit" # Interval to save model checkpoints (every 10 steps) save_steps = 10 # Interval to log training metrics (every 10 steps) logging_steps = 10 # Learning rate for optimization learning_rate = 2e-4 # Maximum gradient norm for gradient clipping max_grad_norm = 0.3 # Maximum number of training steps max_steps = 50 # Warmup ratio for learning rate scheduling warmup_ratio = 0.03 # Type of learning rate scheduler (e.g., "constant") lr_scheduler_type = "constant" # Create a TrainingArguments object to configure the training process training_arguments = TrainingArguments( output_dir=output_dir, per_device_train_batch_size=per_device_train_batch_size, gradient_accumulation_steps=gradient_accumulation_steps, optim=optim, save_steps=save_steps, logging_steps=logging_steps, learning_rate=learning_rate, fp16=True, # Use mixed precision training (16-bit) max_grad_norm=max_grad_norm, max_steps=max_steps, warmup_ratio=warmup_ratio, group_by_length=True, lr_scheduler_type=lr_scheduler_type, ) dataset = dataset.map(lambda x: {"text": x["input"]+x["output"]}) # Import the SFTTrainer from the TRL library from trl import SFTTrainer # Set the maximum sequence length max_seq_length = 512 # Create a trainer instance using SFTTrainer trainer = SFTTrainer( model=model, train_dataset=dataset, peft_config=peft_config, dataset_text_field="text", max_seq_length=max_seq_length, tokenizer=tokenizer, args=training_arguments, ) # Iterate through the named modules of the trainer's model for name, module in trainer.model.named_modules(): # Check if the name contains "norm" if "norm" in name: # Convert the module to use torch.float32 data type module = module.to(torch.float32) trainer.train() prompt = "Generate a python script to add prime numbers between one and ten" inputs = tokenizer.encode(prompt, return_tensors='pt') outputs = model.generate(inputs, max_length=100, temperature = .7, do_sample=True) completion = tokenizer.decode(outputs[0]) print(completion) from transformers import AutoModelForCausalLM, AutoTokenizer checkpoint_name= model model = AutoModelForCausalLM.from_pretrained(checkpoint_name) tokenizer = AutoTokenizer.from_pretrained(checkpoint_name) prompt = "Create a gradio application that help to convert temperature in celcius into temperature in Fahrenheit" inputs = tokenizer(f"Question: {prompt}\n\nAnswer: ", return_tensors="pt") outputs = model.generate( inputs["input_ids"], temperature=0.2, top_p=0.95, max_new_tokens=200 ) input_len=len(inputs["input_ids"]) print(tokenizer.decode(outputs[0][input_len:]))