Daniel Marques commited on
Commit
b606edb
1 Parent(s): 46d132d

feat: add stream

Browse files
Files changed (2) hide show
  1. main.py +8 -2
  2. run_localGPT.py +1 -6
main.py CHANGED
@@ -45,8 +45,7 @@ DB = Chroma(
45
 
46
  RETRIEVER = DB.as_retriever()
47
 
48
- LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
49
- prompt, memory = get_prompt_template(promptTemplate_type="llama", history=True)
50
 
51
  template = """you are a helpful, respectful and honest assistant.
52
  Your name is Katara llma. You should only use the source documents provided to answer the questions.
@@ -134,6 +133,13 @@ def run_ingest_route():
134
  },
135
  )
136
 
 
 
 
 
 
 
 
137
  return {"response": "The training was successfully completed"}
138
  except Exception as e:
139
  raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
 
45
 
46
  RETRIEVER = DB.as_retriever()
47
 
48
+ LLM, StreamData = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME)
 
49
 
50
  template = """you are a helpful, respectful and honest assistant.
51
  Your name is Katara llma. You should only use the source documents provided to answer the questions.
 
133
  },
134
  )
135
 
136
+
137
+ generated_text = ""
138
+ for new_text in StreamData:
139
+ generated_text += new_text
140
+ print(generated_text)
141
+
142
+
143
  return {"response": "The training was successfully completed"}
144
  except Exception as e:
145
  raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
run_localGPT.py CHANGED
@@ -99,12 +99,7 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
99
  local_llm = HuggingFacePipeline(pipeline=pipe)
100
  logging.info("Local LLM Loaded")
101
 
102
- generated_text = ""
103
- for new_text in streamer:
104
- generated_text += new_text
105
- print(generated_text)
106
-
107
- return local_llm
108
 
109
 
110
  def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):
 
99
  local_llm = HuggingFacePipeline(pipeline=pipe)
100
  logging.info("Local LLM Loaded")
101
 
102
+ return (local_llm, streamer)
 
 
 
 
 
103
 
104
 
105
  def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"):