from unsloth import FastLanguageModel import torch import pandas as pd from datasets import Dataset import numpy as np from sklearn.model_selection import train_test_split from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally! dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/tinyllama-bnb-4bit", # "unsloth/tinyllama" for 16bit loading max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) model = FastLanguageModel.get_peft_model( model, r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha = 32, lora_dropout = 0, # Currently only supports dropout = 0 bias = "none", # Currently only supports bias = "none" use_gradient_checkpointing = False, # @@@ IF YOU GET OUT OF MEMORY - set to True @@@ random_state = 3407, use_rslora = False, # We support rank stabilized LoRA loftq_config = None, # And LoftQ ) alpaca_prompt = """Below is an instruction that describes a task, paired with an output that provides correct output for that task. Write a response that produces correct solution to the problem ### Instruction: {} ### Input: {} ### Response: {}""" EOS_TOKEN = tokenizer.eos_token def formatting_prompts_func(examples): instructions = "The problem has the following answer. Understand step-by-step how it is solved to produce the correct solution and then produce the correct solution" inputs = examples["Riddle"] outputs = examples["Answer"] texts = [] for instruction, input, output in zip(instructions, inputs, outputs): # Must add EOS_TOKEN, otherwise your generation will go on forever! text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN texts.append(text) return { "text" : texts, } df = pd.read_csv('math_riddles.csv') train, test = train_test_split(df, test_size=0.2, random_state=42) train_ds = Dataset.from_pandas(train) test_ds = Dataset.from_pandas(test) tokenized_train = train_ds.map(formatting_prompts_func, batched=True, remove_columns=['Riddle', 'Answer', '__index_level_0__']) # Removing features tokenized_test = test_ds.map(formatting_prompts_func, batched=True, remove_columns=['Riddle', 'Answer']) # Removing features trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = tokenized_train, dataset_text_field = "text", max_seq_length = max_seq_length, dataset_num_proc = 24, packing = True, # Packs short sequences together to save time! args = TrainingArguments( per_device_train_batch_size = 2, gradient_accumulation_steps = 1, warmup_ratio = 0.1, num_train_epochs = 3, learning_rate = 2e-5, fp16 = not is_bfloat16_supported(), bf16 = is_bfloat16_supported(), logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.1, lr_scheduler_type = "linear", seed = 3407, output_dir = "outputs", report_to = "none", # Use this for WandB etc ), ) trainer_stats = trainer.train() # Define inference function def inference(instruction, user_input): prompt = alpaca_prompt.format( instruction, user_input, "" # Leave output blank for generation ) inputs = tokenizer([prompt], return_tensors="pt").to("cuda") outputs = model.generate( **inputs, max_new_tokens=64, use_cache=True ) # Fix: Define result before printing it result = tokenizer.batch_decode(outputs)[0] print(result) # Now you can print it # Extract just the generated response (after the prompt) response_prefix = "### Response:" if response_prefix in result: result = result.split(response_prefix)[1].strip() return result # Create Gradio interface import gradio as gr demo = gr.Interface( fn=inference, inputs=[ gr.Textbox(label="Instruction", value="Solve the problem"), gr.Textbox(label="Input", value="There is a three digit number.The second digit is four times as big as the third digit, while the first digit is three less than the second digit.What is the number?") ], outputs="text", title="Language Model Interface", description="Enter an instruction and input to generate a response from the model." ) demo.launch(share=True)