Spaces:
Sleeping
Sleeping
new gguf format
Browse files- model.py +14 -19
- settings.py +1 -1
model.py
CHANGED
@@ -56,26 +56,21 @@ def run(message: str,
|
|
56 |
top_k: int = 49,
|
57 |
repeat_penalty: float = 1.0) -> Iterator[str]:
|
58 |
global llm
|
|
|
59 |
|
60 |
prompt = get_prompt(message, chat_history, system_prompt)
|
61 |
|
62 |
-
stop=["</s>"]
|
63 |
-
|
64 |
outputs = []
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
except Exception as e:
|
77 |
-
print(f"Error: {e}")
|
78 |
-
yield "Error in llm, reinitialising llm..."
|
79 |
-
del llm
|
80 |
-
llm = load_llm()
|
81 |
-
yield "llm reinitialised."
|
|
|
56 |
top_k: int = 49,
|
57 |
repeat_penalty: float = 1.0) -> Iterator[str]:
|
58 |
global llm
|
59 |
+
llm.reset()
|
60 |
|
61 |
prompt = get_prompt(message, chat_history, system_prompt)
|
62 |
|
63 |
+
#stop=["</s>"]
|
64 |
+
stop = ["USER:", "ASSISTANT:"]
|
65 |
outputs = []
|
66 |
+
for text in llm(prompt,
|
67 |
+
max_tokens=max_new_tokens,
|
68 |
+
stop=stop,
|
69 |
+
temperature=temperature,
|
70 |
+
top_p=top_p,
|
71 |
+
top_k=0,
|
72 |
+
repeat_penalty=repeat_penalty,
|
73 |
+
mirostat_mode=2, mirostat_tau=8.0, mirostat_eta=0.2,
|
74 |
+
stream=True):
|
75 |
+
outputs.append(text['choices'][0]['text'])
|
76 |
+
yield ''.join(outputs)
|
|
|
|
|
|
|
|
|
|
|
|
settings.py
CHANGED
@@ -18,7 +18,7 @@ LLAMA_VERBOSE=False
|
|
18 |
|
19 |
MAX_MAX_NEW_TOKENS = 2048
|
20 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
21 |
-
MAX_INPUT_TOKEN_LENGTH =
|
22 |
|
23 |
if IS_LOCAL:
|
24 |
from settings_local import *
|
|
|
18 |
|
19 |
MAX_MAX_NEW_TOKENS = 2048
|
20 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
21 |
+
MAX_INPUT_TOKEN_LENGTH = 3072
|
22 |
|
23 |
if IS_LOCAL:
|
24 |
from settings_local import *
|