Spaces:
Running
Running
john
commited on
Commit
•
65ae873
1
Parent(s):
0b0f7fe
Update app.py
Browse files
app.py
CHANGED
@@ -4,18 +4,25 @@ os.system('CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-
|
|
4 |
import wget
|
5 |
from llama_cpp import Llama
|
6 |
import random
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
filename = wget.download(url)
|
9 |
-
llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31))
|
10 |
filename = wget.download(url)
|
11 |
theme = gr.themes.Soft(
|
12 |
primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
|
13 |
neutral_hue="red",
|
14 |
)
|
15 |
-
title = """<h1 align="center">Chat with awesome
|
16 |
with gr.Blocks(theme=theme) as demo:
|
17 |
gr.HTML(title)
|
18 |
-
gr.HTML("This model is awesome for its size! It is only 20th the size of Chatgpt but is
|
19 |
chatbot = gr.Chatbot()
|
20 |
msg = gr.Textbox()
|
21 |
clear = gr.ClearButton([msg, chatbot])
|
@@ -30,9 +37,9 @@ with gr.Blocks(theme=theme) as demo:
|
|
30 |
#token1 = llm.tokenize(b"### Instruction: ")
|
31 |
#token2 = llm.tokenize(instruction.encode())
|
32 |
#token3 = llm2.tokenize(b"USER: ")
|
33 |
-
tokens3 = llm2.tokenize(user_message.encode())
|
34 |
-
token4 = llm2.tokenize(b"\n\n### Response:")
|
35 |
-
tokens =
|
36 |
history[-1][1] = ""
|
37 |
count = 0
|
38 |
output = ""
|
|
|
4 |
import wget
|
5 |
from llama_cpp import Llama
|
6 |
import random
|
7 |
+
|
8 |
+
def get_num_cores():
|
9 |
+
"""Get the number of CPU cores."""
|
10 |
+
return os.cpu_count()
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
num_cores = get_num_cores()
|
14 |
+
url = 'https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q2_K.bin'
|
15 |
filename = wget.download(url)
|
16 |
+
llm2 = Llama(model_path=filename, seed=random.randint(1, 2**31), lora_path="ggml-adapter-model.bin", n_threads=num_cores)
|
17 |
filename = wget.download(url)
|
18 |
theme = gr.themes.Soft(
|
19 |
primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
|
20 |
neutral_hue="red",
|
21 |
)
|
22 |
+
title = """<h1 align="center">Chat with awesome LLAMA 2 CHAT model!</h1><br>"""
|
23 |
with gr.Blocks(theme=theme) as demo:
|
24 |
gr.HTML(title)
|
25 |
+
gr.HTML("This model is awesome for its size! It is only 20th the size of Chatgpt but is still decent for chatting. However like all models, LLAMA-2-CHAT can hallucinate and provide incorrect information.")
|
26 |
chatbot = gr.Chatbot()
|
27 |
msg = gr.Textbox()
|
28 |
clear = gr.ClearButton([msg, chatbot])
|
|
|
37 |
#token1 = llm.tokenize(b"### Instruction: ")
|
38 |
#token2 = llm.tokenize(instruction.encode())
|
39 |
#token3 = llm2.tokenize(b"USER: ")
|
40 |
+
#tokens3 = llm2.tokenize(user_message.encode())
|
41 |
+
#token4 = llm2.tokenize(b"\n\n### Response:")
|
42 |
+
tokens = llm2.tokenize(user_message.encode())
|
43 |
history[-1][1] = ""
|
44 |
count = 0
|
45 |
output = ""
|