Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from langchain_core.messages import (
|
|
15 |
)
|
16 |
from langchain_huggingface import ChatHuggingFace
|
17 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
18 |
from langchain_huggingface import HuggingFaceEndpoint
|
19 |
from qdrant_client.http import models as rest
|
20 |
#from qdrant_client import QdrantClient
|
@@ -140,7 +141,7 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
140 |
question_lst= [query]
|
141 |
for question in question_lst:
|
142 |
retriever = vectorstore.as_retriever(
|
143 |
-
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k":
|
144 |
|
145 |
context_retrieved = retriever.invoke(question)
|
146 |
print(len(context_retrieved))
|
@@ -184,6 +185,7 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
184 |
|
185 |
# llama-3_1 endpoint = https://howaqfw0lpap12sg.us-east-1.aws.endpoints.huggingface.cloud
|
186 |
# llama-3 endpoint = https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud
|
|
|
187 |
llm_qa = HuggingFaceEndpoint(
|
188 |
endpoint_url="https://howaqfw0lpap12sg.us-east-1.aws.endpoints.huggingface.cloud",
|
189 |
max_new_tokens=512*3,
|
@@ -191,6 +193,8 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
191 |
top_p=0.95,
|
192 |
typical_p=0.95,
|
193 |
temperature=0.01,
|
|
|
|
|
194 |
repetition_penalty=1.03,)
|
195 |
|
196 |
# create rag chain
|
|
|
15 |
)
|
16 |
from langchain_huggingface import ChatHuggingFace
|
17 |
from langchain_core.output_parsers import StrOutputParser
|
18 |
+
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
19 |
from langchain_huggingface import HuggingFaceEndpoint
|
20 |
from qdrant_client.http import models as rest
|
21 |
#from qdrant_client import QdrantClient
|
|
|
141 |
question_lst= [query]
|
142 |
for question in question_lst:
|
143 |
retriever = vectorstore.as_retriever(
|
144 |
+
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
|
145 |
|
146 |
context_retrieved = retriever.invoke(question)
|
147 |
print(len(context_retrieved))
|
|
|
185 |
|
186 |
# llama-3_1 endpoint = https://howaqfw0lpap12sg.us-east-1.aws.endpoints.huggingface.cloud
|
187 |
# llama-3 endpoint = https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud
|
188 |
+
callbacks = [StreamingStdOutCallbackHandler()]
|
189 |
llm_qa = HuggingFaceEndpoint(
|
190 |
endpoint_url="https://howaqfw0lpap12sg.us-east-1.aws.endpoints.huggingface.cloud",
|
191 |
max_new_tokens=512*3,
|
|
|
193 |
top_p=0.95,
|
194 |
typical_p=0.95,
|
195 |
temperature=0.01,
|
196 |
+
callbacks=callbacks,
|
197 |
+
streaming=True,
|
198 |
repetition_penalty=1.03,)
|
199 |
|
200 |
# create rag chain
|