Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
from threading import Thread
|
2 |
from llama_cpp import Llama
|
3 |
import torch
|
@@ -9,13 +11,13 @@ print("Running on device:", torch_device)
|
|
9 |
print("CPU threads:", torch.get_num_threads())
|
10 |
|
11 |
llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
|
12 |
-
n_ctx=
|
13 |
n_threads = 8,
|
14 |
-
n_batch =
|
15 |
)
|
16 |
|
17 |
def gen(x, max_new_tokens):
|
18 |
-
output = llm(f"
|
19 |
|
20 |
return output['choices'][0]['text'].replace('▁',' ')
|
21 |
|
|
|
1 |
+
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir
|
2 |
+
|
3 |
from threading import Thread
|
4 |
from llama_cpp import Llama
|
5 |
import torch
|
|
|
11 |
print("CPU threads:", torch.get_num_threads())
|
12 |
|
13 |
llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
|
14 |
+
n_ctx=50,
|
15 |
n_threads = 8,
|
16 |
+
n_batch = 5
|
17 |
)
|
18 |
|
19 |
def gen(x, max_new_tokens):
|
20 |
+
output = llm(f"### 명령어:\n{x}\n\n### 응답:", max_tokens=max_new_tokens, stop=["###", "\n", ":"], echo=True)
|
21 |
|
22 |
return output['choices'][0]['text'].replace('▁',' ')
|
23 |
|