Spaces:

m9e
/

Llama-2-13B-chat-GPTQ

Paused

Matt Wallace commited on Sep 20, 2023

Commit

a130843

1 Parent(s): 993a943

trying gguf for cpu inf

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
 model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                              device_map="auto",
                                              trust_remote_code=False,
-                                             revision="gptq-4bit-32g-actorder_True")
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
 model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                              device_map="auto",
                                              trust_remote_code=False,
+                                             revision="llama-2-13b-chat.Q4_K_M.gguf")
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)