Update handler.py
Browse files- handler.py +3 -12
handler.py
CHANGED
@@ -33,15 +33,6 @@ class EndpointHandler():
|
|
33 |
# os.environ["LANGCHAIN_API_KEY"] =
|
34 |
|
35 |
# Create LLM
|
36 |
-
|
37 |
-
# load the tokenizer and the quantized mistral model
|
38 |
-
# chat = HuggingFacePipeline.from_model_id(
|
39 |
-
# model_id=path,
|
40 |
-
# task="text-generation",
|
41 |
-
# device=0,
|
42 |
-
# pipeline_kwargs={"max_new_tokens": 1024},
|
43 |
-
# )
|
44 |
-
|
45 |
model_id = path
|
46 |
|
47 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -157,7 +148,7 @@ class EndpointHandler():
|
|
157 |
# This will be improved in the future
|
158 |
# For now you need to save it yourself
|
159 |
|
160 |
-
self.memory.save_context(inputs, {"answer": result["answer"].content})
|
161 |
-
self.memory.load_memory_variables({})
|
162 |
|
163 |
-
return result
|
|
|
33 |
# os.environ["LANGCHAIN_API_KEY"] =
|
34 |
|
35 |
# Create LLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
model_id = path
|
37 |
|
38 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
148 |
# This will be improved in the future
|
149 |
# For now you need to save it yourself
|
150 |
|
151 |
+
# self.memory.save_context(inputs, {"answer": result["answer"].content})
|
152 |
+
# self.memory.load_memory_variables({})
|
153 |
|
154 |
+
return result
|