Update handler.py
Browse files- handler.py +14 -12
handler.py
CHANGED
@@ -29,16 +29,16 @@ class EndpointHandler():
|
|
29 |
def __init__(self, path=""):
|
30 |
|
31 |
# Config LangChain
|
32 |
-
|
33 |
# os.environ["LANGCHAIN_API_KEY"] =
|
34 |
|
35 |
# Create LLM
|
36 |
-
model_id =
|
37 |
|
38 |
model = AutoModelForCausalLM.from_pretrained(
|
39 |
model_id,
|
40 |
device_map={"": "cuda"},
|
41 |
-
torch_dtype=torch.
|
42 |
load_in_8bit=True
|
43 |
)
|
44 |
model.eval()
|
@@ -66,15 +66,13 @@ class EndpointHandler():
|
|
66 |
|
67 |
# Create Text-Embedding Model
|
68 |
embedding_function = HuggingFaceBgeEmbeddings(
|
69 |
-
model_name="
|
70 |
model_kwargs={'device': 'cuda'},
|
71 |
encode_kwargs={'normalize_embeddings': True}
|
72 |
)
|
73 |
|
74 |
# Load Vector db
|
75 |
urls = [
|
76 |
-
"https://hk.on.cc/hk/bkn/cnt/news/20221019/bkn-20221019040039334-1019_00822_001.html",
|
77 |
-
"https://www.hk01.com/%E7%A4%BE%E6%9C%83%E6%96%B0%E8%81%9E/822848/%E5%89%B5%E7%A7%91%E7%B2%BE%E8%8B%B1-%E5%87%BA%E6%88%B02022%E4%B8%96%E7%95%8C%E6%8A%80%E8%83%BD%E5%A4%A7%E8%B3%BD%E7%89%B9%E5%88%A5%E8%B3%BD",
|
78 |
"https://www.wenweipo.com/epaper/view/newsDetail/1582436861224292352.html",
|
79 |
"https://www.thinkhk.com/article/2023-03/24/59874.html"
|
80 |
]
|
@@ -87,9 +85,11 @@ class EndpointHandler():
|
|
87 |
|
88 |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
|
89 |
retriever = vectorstore.as_retriever()
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
93 |
|
94 |
_template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
95 |
Chat History:
|
@@ -160,11 +160,13 @@ class EndpointHandler():
|
|
160 |
|
161 |
result = self.final_chain.invoke({"question": inputs})
|
162 |
|
|
|
|
|
163 |
# Note that the memory does not save automatically
|
164 |
# This will be improved in the future
|
165 |
# For now you need to save it yourself
|
166 |
-
|
167 |
-
self.memory.save_context(inputs, {"answer": result["answer"].content})
|
168 |
self.memory.load_memory_variables({})
|
169 |
|
170 |
-
return
|
|
|
|
29 |
def __init__(self, path=""):
|
30 |
|
31 |
# Config LangChain
|
32 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
33 |
# os.environ["LANGCHAIN_API_KEY"] =
|
34 |
|
35 |
# Create LLM
|
36 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
37 |
|
38 |
model = AutoModelForCausalLM.from_pretrained(
|
39 |
model_id,
|
40 |
device_map={"": "cuda"},
|
41 |
+
torch_dtype=torch.float16,
|
42 |
load_in_8bit=True
|
43 |
)
|
44 |
model.eval()
|
|
|
66 |
|
67 |
# Create Text-Embedding Model
|
68 |
embedding_function = HuggingFaceBgeEmbeddings(
|
69 |
+
model_name="DMetaSoul/Dmeta-embedding",
|
70 |
model_kwargs={'device': 'cuda'},
|
71 |
encode_kwargs={'normalize_embeddings': True}
|
72 |
)
|
73 |
|
74 |
# Load Vector db
|
75 |
urls = [
|
|
|
|
|
76 |
"https://www.wenweipo.com/epaper/view/newsDetail/1582436861224292352.html",
|
77 |
"https://www.thinkhk.com/article/2023-03/24/59874.html"
|
78 |
]
|
|
|
85 |
|
86 |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
|
87 |
retriever = vectorstore.as_retriever()
|
88 |
+
|
89 |
+
compressor = LLMChainExtractor.from_llm(chat)
|
90 |
+
compression_retriever = ContextualCompressionRetriever(
|
91 |
+
base_compressor=compressor, base_retriever=retriever
|
92 |
+
)
|
93 |
|
94 |
_template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
95 |
Chat History:
|
|
|
160 |
|
161 |
result = self.final_chain.invoke({"question": inputs})
|
162 |
|
163 |
+
answer = result['answer']
|
164 |
+
|
165 |
# Note that the memory does not save automatically
|
166 |
# This will be improved in the future
|
167 |
# For now you need to save it yourself
|
168 |
+
# self.memory.save_context(inputs, {"answer": answer})
|
|
|
169 |
self.memory.load_memory_variables({})
|
170 |
|
171 |
+
return answer
|
172 |
+
|