Spaces:
Runtime error
Runtime error
File size: 824 Bytes
321de10 e2cfe37 321de10 357d45c 321de10 357d45c 321de10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer,pipeline
model_name = "model2/"
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16,
# )
model = AutoModelForCausalLM.from_pretrained(
model_name,
# quantization_config=bnb_config,
trust_remote_code=True
)
model.config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# Run text generation pipeline with our next model
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
def run_inference(prompt):
result = pipe(f"<s>[INST] {prompt} [/INST]")
return result[0]['generated_text'] |