Spaces:
Sleeping
Sleeping
Update app/app.py
Browse files- app/app.py +3 -3
app/app.py
CHANGED
|
@@ -73,8 +73,8 @@ logger.info(f"Loading GGUF model from: {MODEL_PATH}")
|
|
| 73 |
try:
|
| 74 |
llm = Llama(
|
| 75 |
model_path=MODEL_PATH,
|
| 76 |
-
n_ctx=
|
| 77 |
-
n_threads=
|
| 78 |
n_batch=512,
|
| 79 |
use_mlock=True,
|
| 80 |
verbose=False
|
|
@@ -125,7 +125,7 @@ async def generate_llm_response(prompt: str, request_id: str):
|
|
| 125 |
loop = asyncio.get_running_loop()
|
| 126 |
response = await loop.run_in_executor(
|
| 127 |
None,
|
| 128 |
-
lambda: llm(prompt, max_tokens=
|
| 129 |
)
|
| 130 |
answer = response["choices"][0]["text"].strip()
|
| 131 |
if not answer:
|
|
|
|
| 73 |
try:
|
| 74 |
llm = Llama(
|
| 75 |
model_path=MODEL_PATH,
|
| 76 |
+
n_ctx=2048,
|
| 77 |
+
n_threads=1,
|
| 78 |
n_batch=512,
|
| 79 |
use_mlock=True,
|
| 80 |
verbose=False
|
|
|
|
| 125 |
loop = asyncio.get_running_loop()
|
| 126 |
response = await loop.run_in_executor(
|
| 127 |
None,
|
| 128 |
+
lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
|
| 129 |
)
|
| 130 |
answer = response["choices"][0]["text"].strip()
|
| 131 |
if not answer:
|