Spaces:
Runtime error
Runtime error
File size: 1,363 Bytes
4833ab8 a784eb7 4833ab8 f864b43 fc3ed42 4833ab8 a784eb7 43f1a64 4833ab8 fc3ed42 f864b43 4833ab8 f864b43 fc3ed42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import os
import torch
from datasets import load_dataset
import torch
from peft import PeftModel
from transformers import StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, BitsAndBytesConfig
from torch import cuda, bfloat16
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
# GPT2LMHeadModel,
# GPT2Tokenizer,
BitsAndBytesConfig,
HfArgumentParser,
pipeline,
logging,
)
from peft import LoraConfig, PeftModel
import os
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
model_name = "Ayush28/Llama-2-7b"
model_token= "Ayush28/Llama-2-tokenizer"
trained_model = AutoPeftModelForCausalLM.from_pretrained(
model_name,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4'
), torch_dtype=torch.bfloat16,offload_folder="offload/",
)
tokenizer = AutoTokenizer.from_pretrained(model_token)
prompt = "I purchased a defective product from a store, and the store is refusing to replace or refund it. What do I do?"
pipe = pipeline(task="text-generation", model=trained_model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"###Instruction:{prompt}")
print(result[0]['generated_text']) |