Spaces:

dkdaniz
/

katara

Paused

Daniel Marques commited on Oct 15, 2023

Commit

66a4e8f

1 Parent(s): b606edb

feat: add stream

Files changed (2) hide show

main.py CHANGED Viewed

@@ -14,9 +14,9 @@ from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 # from langchain.embeddings import HuggingFaceEmbeddings
 from run_localGPT import load_model
-from prompt_template_utils import get_prompt_template
 # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.vectorstores import Chroma
@@ -45,7 +45,7 @@ DB = Chroma(
 RETRIEVER = DB.as_retriever()
-LLM, StreamData = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
 template = """you are a helpful, respectful and honest assistant.
 Your name is Katara llma. You should only use the source documents provided to answer the questions.

 from langchain.prompts import PromptTemplate
 from langchain.memory import ConversationBufferMemory
 # from langchain.embeddings import HuggingFaceEmbeddings
 from run_localGPT import load_model
 # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.vectorstores import Chroma
 RETRIEVER = DB.as_retriever()
+LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=False)
 template = """you are a helpful, respectful and honest assistant.
 Your name is Katara llma. You should only use the source documents provided to answer the questions.

run_localGPT.py CHANGED Viewed

@@ -10,8 +10,6 @@ from langchain.callbacks.manager import CallbackManager
 torch.set_grad_enabled(False)
-callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 from prompt_template_utils import get_prompt_template
 from langchain.vectorstores import Chroma
@@ -38,7 +36,7 @@ from constants import (
-def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
     """
     Select a model for text generation using the HuggingFace library.
     If you are running this for the first time, it will download a model for you.
@@ -91,15 +89,13 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
         top_k=40,
         repetition_penalty=1.0,
         generation_config=generation_config,
-        streamer=streamer,
-        num_return_sequences=1,
-        eos_token_id=tokenizer.eos_token_id
     )
     local_llm = HuggingFacePipeline(pipeline=pipe)
     logging.info("Local LLM Loaded")
-    return (local_llm, streamer)
 def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):

 torch.set_grad_enabled(False)
 from prompt_template_utils import get_prompt_template
 from langchain.vectorstores import Chroma
+def load_model(device_type, model_id, model_basename=None, LOGGING=logging, stream=False):
     """
     Select a model for text generation using the HuggingFace library.
     If you are running this for the first time, it will download a model for you.
         top_k=40,
         repetition_penalty=1.0,
         generation_config=generation_config,
+        callback=[StreamingStdOutCallbackHandler()]
     )
     local_llm = HuggingFacePipeline(pipeline=pipe)
     logging.info("Local LLM Loaded")
+    return local_llm
 def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):