Spaces:

dkdaniz
/

katara

Paused

App Files Files Community

Daniel Marques commited on Oct 16, 2023

Commit

99f6cbc

1 Parent(s): ef75206

fix: add websocket in handlerToken

Browse files

Files changed (1) hide show

main.py +8 -6

main.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import glob
 import shutil
 import subprocess
-import sys
 from typing import Any, Dict, List
@@ -17,7 +17,6 @@ from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 from langchain.callbacks.base import BaseCallbackHandler
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.schema import LLMResult
@@ -59,7 +58,7 @@ DB = Chroma(
 RETRIEVER = DB.as_retriever()
-class MyCustomSyncHandler(StreamingStdOutCallbackHandler):
     def __init__(self):
         self.end = False
         self.websocket = None
@@ -73,8 +72,10 @@ class MyCustomSyncHandler(StreamingStdOutCallbackHandler):
         self.end = True
     def on_llm_new_token(self, token: str, **kwargs) -> Any:
         if self.websocket != None:
-            self.websocket.send_text(token)
         print(token)
@@ -82,7 +83,7 @@ class MyCustomSyncHandler(StreamingStdOutCallbackHandler):
 handlerToken = MyCustomSyncHandler()
-LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[])
 template = """You are a helpful, respectful and honest assistant.
 Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
@@ -101,7 +102,6 @@ QA = RetrievalQA.from_chain_type(
     return_source_documents=SHOW_SOURCES,
     chain_type_kwargs={
         "prompt": QA_CHAIN_PROMPT,
-        "callbacks": [handlerToken]
     },
 )
@@ -260,6 +260,8 @@ async def websocket_endpoint(websocket: WebSocket):
         while True:
             handlerToken.websocket = websocket
             data = await websocket.receive_text()
             res = QA(data)
             print(res)

 import glob
 import shutil
 import subprocess
+import asyncio
 from typing import Any, Dict, List
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import LLMResult
 RETRIEVER = DB.as_retriever()
+class MyCustomSyncHandler(BaseCallbackHandler):
     def __init__(self):
         self.end = False
         self.websocket = None
         self.end = True
     def on_llm_new_token(self, token: str, **kwargs) -> Any:
+        print(token)
         if self.websocket != None:
+            asyncio.run(self.websocket.send_text(token))
         print(token)
 handlerToken = MyCustomSyncHandler()
+LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
 template = """You are a helpful, respectful and honest assistant.
 Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
     return_source_documents=SHOW_SOURCES,
     chain_type_kwargs={
         "prompt": QA_CHAIN_PROMPT,
     },
 )
         while True:
             handlerToken.websocket = websocket
+            print(handlerToken.websocket)
             data = await websocket.receive_text()
             res = QA(data)
             print(res)