Spaces:

Lihuchen
/

llm_with_confidence

Runtime error

Lihuchen commited on Nov 1, 2023

Commit

f2ff742

•

1 Parent(s): d94406a

Upload 3 files

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from llama_generate import run
 def greet(query):

 import gradio as gr
+from cpu_llama_generate import run
 def greet(query):

cpu_llama_generate.py ADDED Viewed

+from ctransformers import AutoModelForCausalLM
+#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # the device to load the model onto
+device = 'cpu'
+# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
+llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.Q4_K_M.gguf", model_type="llama", gpu_layers=0)
+def run(query):
+    return llm(query)

llama_generate.py CHANGED Viewed

@@ -11,12 +11,7 @@ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                              trust_remote_code=False,
                                              revision="main")
-from ctransformers import AutoModelForCausalLM
-# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
-llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7b-Chat-GGUF", model_file="llama-2-7b-chat.q4_K_M.gguf", model_type="llama", gpu_layers=50)
-print(llm("AI is going to"))
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

                                              trust_remote_code=False,
                                              revision="main")
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)