Edit model card

Model Trained Using AutoTrain

This model was trained using AutoTrain. For more information, please visit AutoTrain.

Usage


from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from peft import PeftModel, PeftConfig
import torch
import bitsandbytes as bnb
import time

model_name = "Punthon/llama2-sdgs"

# Load the PEFT configuration
peft_config = PeftConfig.from_pretrained(model_name)

# Load the tokenizer from the base model
# Use the tokenizer associated with the base model or your fine-tuned model if needed
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)

# Load the base model with 8-bit precision
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    load_in_8bit=True,  # Load in 8-bit precision
    device_map="auto"
)

# Resize the base model embeddings to match the tokenizer
base_model.resize_token_embeddings(len(tokenizer))

# Load your fine-tuned model
model = PeftModel.from_pretrained(base_model, model_name)

# Define the instruction and input text
instruction = "Identify the Sustainable Development Goals (SDGs) relevant to the passage below. Provide only the SDG numbers and the reason for their relevance. Do not repeat the passage."
input_text = "Thailand is considered a leader in tiger conservation in Southeast Asia. Most recently at the 'Sustainable Finance for Tiger Landscapes Conservation' conference in Bhutan, Thailand has been declared as the “Champion for Tiger Conservation in Southeast Asia.”"

prompt = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input_text}

### Response:
"""

# Define generation configuration
generation_config = GenerationConfig(
    do_sample=True,
    top_k=30,
    temperature=0.7,
    max_new_tokens=200,
    repetition_penalty=1.1,
    pad_token_id=tokenizer.eos_token_id
)

# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate outputs
st_time = time.time()
outputs = model.generate(**inputs, generation_config=generation_config)

# Decode and print response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Response time: {time.time() - st_time} seconds")
print(response)
Downloads last month
2
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for Punthon/llama2-sdgs

Finetuned
this model