How to use :

!pip install peft accelerate bitsandbytes
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# Function to generate and solve problems using the fine-tuned model
def generate_and_solve_problems(model, tokenizer, num_problems=5):
    """
    Generate and solve math and reasoning problems using the fine-tuned model.

    Parameters:
        model: Fine-tuned language model
        tokenizer: Corresponding tokenizer
        num_problems: Number of problems to generate and solve
    """
    # Prompt template
    test_prompt = """Below is a math problem. Solve the problem step by step and provide a detailed explanation.

### Problem:
{}

### Solution:"""

    # Sample test problems
    test_problems = [
        "A car travels at 40 mph for 2 hours, then at 60 mph for another 3 hours. How far does it travel in total?",
        "If the sum of three consecutive integers is 72, what are the integers?",
        "A train leaves Station A at 10:00 AM traveling at 50 mph. Another train leaves Station A at 12:00 PM traveling at 70 mph on the same track. At what time will the second train catch up to the first?",
        "A rectangle has a length of 12 units and a width of 8 units. If the length is increased by 50% and the width is reduced by 25%, what is the new area of the rectangle?",
        "If a person invests $1000 in a savings account that earns 5% annual interest compounded yearly, how much money will be in the account after 10 years?"
    ]

    # Use only the specified number of problems
    test_problems = test_problems[:num_problems]

    for problem in test_problems:
        # Create the prompt
        prompt = test_prompt.format(problem)

        # Tokenize and generate response
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda")
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
        )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Print the problem and the solution
        print(response)
        print("\n" + "="*50 + "\n")

# Example usage with model and tokenizer

base_model_name = "unsloth/phi-3-mini-4k-instruct-bnb-4bit"
lora_model_name = "Vijayendra/Phi3-LoRA-GSM8k"

# Load base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

# Load the fine-tuned LoRA model
model = PeftModel.from_pretrained(base_model, lora_model_name)
model.eval()

# Call the function to solve problems
generate_and_solve_problems(model, tokenizer)
Downloads last month
30
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Dataset used to train Vijayendra/Phi3-LoRA-GSM8k