Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import copy
|
|
| 3 |
import gradio as gr
|
| 4 |
import spaces
|
| 5 |
from llama_cpp import Llama
|
|
|
|
| 6 |
import os
|
| 7 |
from huggingface_hub import hf_hub_download
|
| 8 |
|
|
@@ -21,8 +22,9 @@ llm = Llama(
|
|
| 21 |
filename=MODEL_FILE,
|
| 22 |
),
|
| 23 |
n_ctx=4096,
|
| 24 |
-
n_gpu_layers=-1,
|
| 25 |
-
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
|
@@ -61,14 +63,14 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
| 61 |
|
| 62 |
print(f"Conversation is -\n{conversation}")
|
| 63 |
|
| 64 |
-
output = llm
|
| 65 |
messages=conversation,
|
| 66 |
top_k=top_k,
|
| 67 |
top_p=top_p,
|
| 68 |
repeat_penalty=penalty,
|
| 69 |
max_tokens=max_new_tokens,
|
| 70 |
stream =True,
|
| 71 |
-
temperature=temperature,
|
| 72 |
)
|
| 73 |
|
| 74 |
for out in output:
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import spaces
|
| 5 |
from llama_cpp import Llama
|
| 6 |
+
import llama_cpp.llama_tokenizer
|
| 7 |
import os
|
| 8 |
from huggingface_hub import hf_hub_download
|
| 9 |
|
|
|
|
| 22 |
filename=MODEL_FILE,
|
| 23 |
),
|
| 24 |
n_ctx=4096,
|
| 25 |
+
n_gpu_layers=-1,
|
| 26 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(MODEL_ID),
|
| 27 |
+
verbose=False,
|
| 28 |
)
|
| 29 |
|
| 30 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
|
|
|
| 63 |
|
| 64 |
print(f"Conversation is -\n{conversation}")
|
| 65 |
|
| 66 |
+
output = llm(
|
| 67 |
messages=conversation,
|
| 68 |
top_k=top_k,
|
| 69 |
top_p=top_p,
|
| 70 |
repeat_penalty=penalty,
|
| 71 |
max_tokens=max_new_tokens,
|
| 72 |
stream =True,
|
| 73 |
+
temperature=temperature,
|
| 74 |
)
|
| 75 |
|
| 76 |
for out in output:
|