|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer,pipeline |
|
|
|
model_name = "model2/" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
|
|
trust_remote_code=True |
|
) |
|
model.config.use_cache = False |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) |
|
|
|
|
|
def run_inference(prompt): |
|
result = pipe(f"<s>[INST] {prompt} [/INST]") |
|
return result[0]['generated_text'] |