Spaces:

dkdaniz
/

katara

Paused

Daniel Marques commited on Oct 15, 2023

Commit

b606edb

•

1 Parent(s): 46d132d

feat: add stream

Files changed (2) hide show

main.py CHANGED Viewed

@@ -45,8 +45,7 @@ DB = Chroma(
 RETRIEVER = DB.as_retriever()
-LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
-prompt, memory = get_prompt_template(promptTemplate_type="llama", history=True)
 template = """you are a helpful, respectful and honest assistant.
 Your name is Katara llma. You should only use the source documents provided to answer the questions.
@@ -134,6 +133,13 @@ def run_ingest_route():
             },
         )
         return {"response": "The training was successfully completed"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")

 RETRIEVER = DB.as_retriever()
+LLM, StreamData = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
 template = """you are a helpful, respectful and honest assistant.
 Your name is Katara llma. You should only use the source documents provided to answer the questions.
             },
         )
+        generated_text = ""
+        for new_text in StreamData:
+            generated_text += new_text
+            print(generated_text)
         return {"response": "The training was successfully completed"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")

run_localGPT.py CHANGED Viewed

@@ -99,12 +99,7 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
     local_llm = HuggingFacePipeline(pipeline=pipe)
     logging.info("Local LLM Loaded")
-    generated_text = ""
-    for new_text in streamer:
-        generated_text += new_text
-        print(generated_text)
-    return local_llm
 def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):

     local_llm = HuggingFacePipeline(pipeline=pipe)
     logging.info("Local LLM Loaded")
+    return (local_llm, streamer)
 def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):