Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
•
98a002e
1
Parent(s):
4395d76
Update forindo branch
Browse files
app.py
CHANGED
@@ -17,8 +17,6 @@ from dl_hf_model import dl_hf_model
|
|
17 |
from loguru import logger
|
18 |
|
19 |
url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
|
20 |
-
if "forindo" in platform.node():
|
21 |
-
url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
22 |
|
23 |
# Prompt template: Guanaco
|
24 |
prompt_template = """You are a helpful assistant. Let's think step by step.
|
@@ -36,19 +34,24 @@ logger.debug(f"{cpu_count=}")
|
|
36 |
|
37 |
LLM = None
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
|
|
|
45 |
LLM = AutoModelForCausalLM.from_pretrained(
|
46 |
model_loc,
|
47 |
model_type="llama",
|
48 |
threads=cpu_count,
|
49 |
)
|
50 |
|
51 |
-
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
52 |
|
53 |
os.environ["TZ"] = "Asia/Shanghai"
|
54 |
try:
|
|
|
17 |
from loguru import logger
|
18 |
|
19 |
url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin" # 8.14G
|
|
|
|
|
20 |
|
21 |
# Prompt template: Guanaco
|
22 |
prompt_template = """You are a helpful assistant. Let's think step by step.
|
|
|
34 |
|
35 |
LLM = None
|
36 |
|
37 |
+
if "forindo" in platform.node():
|
38 |
+
# url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin" # 29.7G
|
39 |
+
model_loc = "/home/mu2018/github/langchain-llama-2-70b-guanaco-qlora-ggml/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"
|
40 |
+
else:
|
41 |
+
try:
|
42 |
+
model_loc, file_size = dl_hf_model(url)
|
43 |
+
logger.info(f"done load llm {model_loc=} {file_size=}G")
|
44 |
+
except Exception as exc_:
|
45 |
+
logger.error(exc_)
|
46 |
+
raise SystemExit(1) from exc_
|
47 |
|
48 |
+
logger.debug(f"{model_loc=}")
|
49 |
LLM = AutoModelForCausalLM.from_pretrained(
|
50 |
model_loc,
|
51 |
model_type="llama",
|
52 |
threads=cpu_count,
|
53 |
)
|
54 |
|
|
|
55 |
|
56 |
os.environ["TZ"] = "Asia/Shanghai"
|
57 |
try:
|