Spaces:

Kalpokoch
/

ChatbotDemo

Sleeping

Kalpokoch commited on Aug 18

Commit

b3975f9

verified ·

1 Parent(s): 0a51966

Update app/app.py

Files changed (1) hide show

app/app.py CHANGED Viewed

@@ -73,8 +73,8 @@ logger.info(f"Loading GGUF model from: {MODEL_PATH}")
 try:
     llm = Llama(
         model_path=MODEL_PATH,
-        n_ctx=4096,
-        n_threads=4,
         n_batch=512,
         use_mlock=True,
         verbose=False
@@ -125,7 +125,7 @@ async def generate_llm_response(prompt: str, request_id: str):
     loop = asyncio.get_running_loop()
     response = await loop.run_in_executor(
         None,
-        lambda: llm(prompt, max_tokens=2048, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
     )
     answer = response["choices"][0]["text"].strip()
     if not answer:

 try:
     llm = Llama(
         model_path=MODEL_PATH,
+        n_ctx=2048,
+        n_threads=1,
         n_batch=512,
         use_mlock=True,
         verbose=False
     loop = asyncio.get_running_loop()
     response = await loop.run_in_executor(
         None,
+        lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
     )
     answer = response["choices"][0]["text"].strip()
     if not answer: