lchakkei commited on
Commit
fb88183
1 Parent(s): d40c010

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +14 -12
handler.py CHANGED
@@ -29,16 +29,16 @@ class EndpointHandler():
29
  def __init__(self, path=""):
30
 
31
  # Config LangChain
32
- # os.environ["LANGCHAIN_TRACING_V2"] = "true"
33
  # os.environ["LANGCHAIN_API_KEY"] =
34
 
35
  # Create LLM
36
- model_id = path
37
 
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_id,
40
  device_map={"": "cuda"},
41
- torch_dtype=torch.bfloat16,
42
  load_in_8bit=True
43
  )
44
  model.eval()
@@ -66,15 +66,13 @@ class EndpointHandler():
66
 
67
  # Create Text-Embedding Model
68
  embedding_function = HuggingFaceBgeEmbeddings(
69
- model_name="BAAI/bge-large-zh",
70
  model_kwargs={'device': 'cuda'},
71
  encode_kwargs={'normalize_embeddings': True}
72
  )
73
 
74
  # Load Vector db
75
  urls = [
76
- "https://hk.on.cc/hk/bkn/cnt/news/20221019/bkn-20221019040039334-1019_00822_001.html",
77
- "https://www.hk01.com/%E7%A4%BE%E6%9C%83%E6%96%B0%E8%81%9E/822848/%E5%89%B5%E7%A7%91%E7%B2%BE%E8%8B%B1-%E5%87%BA%E6%88%B02022%E4%B8%96%E7%95%8C%E6%8A%80%E8%83%BD%E5%A4%A7%E8%B3%BD%E7%89%B9%E5%88%A5%E8%B3%BD",
78
  "https://www.wenweipo.com/epaper/view/newsDetail/1582436861224292352.html",
79
  "https://www.thinkhk.com/article/2023-03/24/59874.html"
80
  ]
@@ -87,9 +85,11 @@ class EndpointHandler():
87
 
88
  vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
89
  retriever = vectorstore.as_retriever()
90
-
91
- # compressor = LLMChainExtractor.from_llm(chat)
92
- # retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
 
 
93
 
94
  _template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
95
  Chat History:
@@ -160,11 +160,13 @@ class EndpointHandler():
160
 
161
  result = self.final_chain.invoke({"question": inputs})
162
 
 
 
163
  # Note that the memory does not save automatically
164
  # This will be improved in the future
165
  # For now you need to save it yourself
166
-
167
- self.memory.save_context(inputs, {"answer": result["answer"].content})
168
  self.memory.load_memory_variables({})
169
 
170
- return result
 
 
29
  def __init__(self, path=""):
30
 
31
  # Config LangChain
32
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
33
  # os.environ["LANGCHAIN_API_KEY"] =
34
 
35
  # Create LLM
36
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
37
 
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_id,
40
  device_map={"": "cuda"},
41
+ torch_dtype=torch.float16,
42
  load_in_8bit=True
43
  )
44
  model.eval()
 
66
 
67
  # Create Text-Embedding Model
68
  embedding_function = HuggingFaceBgeEmbeddings(
69
+ model_name="DMetaSoul/Dmeta-embedding",
70
  model_kwargs={'device': 'cuda'},
71
  encode_kwargs={'normalize_embeddings': True}
72
  )
73
 
74
  # Load Vector db
75
  urls = [
 
 
76
  "https://www.wenweipo.com/epaper/view/newsDetail/1582436861224292352.html",
77
  "https://www.thinkhk.com/article/2023-03/24/59874.html"
78
  ]
 
85
 
86
  vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
87
  retriever = vectorstore.as_retriever()
88
+
89
+ compressor = LLMChainExtractor.from_llm(chat)
90
+ compression_retriever = ContextualCompressionRetriever(
91
+ base_compressor=compressor, base_retriever=retriever
92
+ )
93
 
94
  _template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
95
  Chat History:
 
160
 
161
  result = self.final_chain.invoke({"question": inputs})
162
 
163
+ answer = result['answer']
164
+
165
  # Note that the memory does not save automatically
166
  # This will be improved in the future
167
  # For now you need to save it yourself
168
+ # self.memory.save_context(inputs, {"answer": answer})
 
169
  self.memory.load_memory_variables({})
170
 
171
+ return answer
172
+