operablepattern commited on
Commit
f678f2c
1 Parent(s): 08f0765

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -16,7 +16,7 @@ pipe = pipeline(
16
  )
17
 
18
  # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
19
- llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q4_K_M.gguf", model_type="mistral", gpu_layers=0)
20
  tokenizer = AutoTokenizer.from_pretrained(llm)
21
 
22
  llm_pipe = pipeline("text-generation", model=llm, tokenizer=tokenizer)
 
16
  )
17
 
18
  # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
19
+ llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q4_K_M.gguf", model_type="mistral", gpu_layers=0, hf=True)
20
  tokenizer = AutoTokenizer.from_pretrained(llm)
21
 
22
  llm_pipe = pipeline("text-generation", model=llm, tokenizer=tokenizer)