llama-cpp-python-cuda-gradio

Runtime error

radames commited on Aug 21, 2023

Commit

ae0a725

1 Parent(s): 88cf25d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import copy
 import time
@@ -8,8 +9,8 @@ from huggingface_hub import hf_hub_download
 llm = Llama(
     model_path=hf_hub_download(
-        repo_id="TheBloke/Llama-2-7B-Chat-GGML",
-        filename="llama-2-7b-chat.ggmlv3.q5_0.bin",
     ),
     n_ctx=2048,
     n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM

+import os
 import gradio as gr
 import copy
 import time
 llm = Llama(
     model_path=hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7B-Chat-GGML"),
+        filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.ggmlv3.q5_0.bin"),
     ),
     n_ctx=2048,
     n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM