update chains
Browse files
Dockerfile
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
-
FROM python:3.11
|
|
|
2 |
|
3 |
RUN adduser --uid 1000 --disabled-password --gecos '' appuser
|
|
|
4 |
USER 1000
|
5 |
|
6 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
#slim-bookworm
|
3 |
|
4 |
RUN adduser --uid 1000 --disabled-password --gecos '' appuser
|
5 |
+
RUN apt-get update && apt-get install gcc tesseract-ocr -y
|
6 |
USER 1000
|
7 |
|
8 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
langchain-streamlit-demo/app.py
CHANGED
@@ -18,8 +18,7 @@ from langchain.schema.retriever import BaseRetriever
|
|
18 |
from langchain_community.callbacks import StreamlitCallbackHandler
|
19 |
from langsmith.client import Client
|
20 |
from llm_resources import (
|
21 |
-
|
22 |
-
get_doc_agent,
|
23 |
get_llm,
|
24 |
get_runnable,
|
25 |
get_texts_and_multiretriever,
|
@@ -396,15 +395,19 @@ get_llm_args = dict(
|
|
396 |
# },
|
397 |
)
|
398 |
get_llm_args_temp_zero = get_llm_args | {"temperature": 0.0}
|
399 |
-
st.session_state.llm = get_llm(**
|
400 |
|
401 |
# --- Chat History ---
|
402 |
for msg in STMEMORY.messages:
|
403 |
if msg.content and msg.type in ("ai", "assistant", "human", "user"):
|
|
|
|
|
|
|
|
|
404 |
st.chat_message(
|
405 |
msg.type,
|
406 |
avatar="🦜" if msg.type in ("ai", "assistant") else None,
|
407 |
-
).write(
|
408 |
|
409 |
|
410 |
# --- Current Chat ---
|
@@ -423,10 +426,11 @@ if st.session_state.llm:
|
|
423 |
|
424 |
# --- Chat Input ---
|
425 |
prompt = st.chat_input(placeholder="Ask me a question!")
|
426 |
-
if question and question != "--":
|
427 |
prompt = question
|
428 |
-
if
|
429 |
-
st.
|
|
|
430 |
feedback_update = None
|
431 |
feedback = None
|
432 |
|
@@ -441,7 +445,7 @@ if st.session_state.llm:
|
|
441 |
config: Dict[str, Any] = dict(
|
442 |
callbacks=callbacks,
|
443 |
tags=["Streamlit Chat"],
|
444 |
-
verbose=
|
445 |
return_intermediate_steps=False,
|
446 |
)
|
447 |
if st.session_state.provider == "Anthropic":
|
@@ -456,8 +460,7 @@ if st.session_state.llm:
|
|
456 |
# )
|
457 |
|
458 |
full_response: Union[str, None] = None
|
459 |
-
|
460 |
-
# callbacks.append(stream_handler)
|
461 |
message_placeholder = st.empty()
|
462 |
default_tools = [
|
463 |
# DuckDuckGoSearchRun(),
|
@@ -470,7 +473,9 @@ if st.session_state.llm:
|
|
470 |
# search_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
471 |
# writer_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
472 |
# )
|
473 |
-
st_callback = StreamlitCallbackHandler(
|
|
|
|
|
474 |
callbacks.append(st_callback)
|
475 |
|
476 |
# @tool("web-research-assistant")
|
@@ -513,46 +518,58 @@ if st.session_state.llm:
|
|
513 |
|
514 |
@tool("user-document-chat")
|
515 |
def doc_chain_tool(input_str: str, callbacks: Callbacks = None):
|
516 |
-
"""
|
517 |
-
|
|
|
|
|
518 |
input_str,
|
519 |
config=get_config(callbacks),
|
520 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
521 |
|
522 |
-
|
523 |
-
|
524 |
-
|
|
|
|
|
525 |
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
config=get_config(callbacks),
|
536 |
-
)
|
537 |
|
538 |
-
TOOLS =
|
539 |
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
# st.session_state.llm,
|
544 |
-
# callbacks,
|
545 |
-
# )
|
546 |
-
# else:
|
547 |
-
st.session_state.chain = get_runnable(
|
548 |
-
True, # use_document_chat,
|
549 |
-
document_chat_chain_type,
|
550 |
st.session_state.llm,
|
551 |
-
|
552 |
-
MEMORY,
|
553 |
-
chat_prompt,
|
554 |
-
prompt,
|
555 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
556 |
|
557 |
# --- LLM call ---
|
558 |
try:
|
|
|
18 |
from langchain_community.callbacks import StreamlitCallbackHandler
|
19 |
from langsmith.client import Client
|
20 |
from llm_resources import (
|
21 |
+
get_agent,
|
|
|
22 |
get_llm,
|
23 |
get_runnable,
|
24 |
get_texts_and_multiretriever,
|
|
|
395 |
# },
|
396 |
)
|
397 |
get_llm_args_temp_zero = get_llm_args | {"temperature": 0.0}
|
398 |
+
st.session_state.llm = get_llm(**get_llm_args_temp_zero)
|
399 |
|
400 |
# --- Chat History ---
|
401 |
for msg in STMEMORY.messages:
|
402 |
if msg.content and msg.type in ("ai", "assistant", "human", "user"):
|
403 |
+
content = (
|
404 |
+
# msg.content.split("-" * 50)[1] if ("-" * 50) in msg.content else
|
405 |
+
msg.content
|
406 |
+
)
|
407 |
st.chat_message(
|
408 |
msg.type,
|
409 |
avatar="🦜" if msg.type in ("ai", "assistant") else None,
|
410 |
+
).write(content)
|
411 |
|
412 |
|
413 |
# --- Current Chat ---
|
|
|
426 |
|
427 |
# --- Chat Input ---
|
428 |
prompt = st.chat_input(placeholder="Ask me a question!")
|
429 |
+
if question and question != "--" and not prompt:
|
430 |
prompt = question
|
431 |
+
if not uploaded_file:
|
432 |
+
st.error("Please upload a PDF to use the document chat feature.")
|
433 |
+
elif prompt:
|
434 |
feedback_update = None
|
435 |
feedback = None
|
436 |
|
|
|
445 |
config: Dict[str, Any] = dict(
|
446 |
callbacks=callbacks,
|
447 |
tags=["Streamlit Chat"],
|
448 |
+
verbose=False,
|
449 |
return_intermediate_steps=False,
|
450 |
)
|
451 |
if st.session_state.provider == "Anthropic":
|
|
|
460 |
# )
|
461 |
|
462 |
full_response: Union[str, None] = None
|
463 |
+
|
|
|
464 |
message_placeholder = st.empty()
|
465 |
default_tools = [
|
466 |
# DuckDuckGoSearchRun(),
|
|
|
473 |
# search_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
474 |
# writer_llm=get_llm(**get_llm_args_temp_zero), # type: ignore
|
475 |
# )
|
476 |
+
st_callback = StreamlitCallbackHandler(
|
477 |
+
st.container(), expand_new_thoughts=False
|
478 |
+
)
|
479 |
callbacks.append(st_callback)
|
480 |
|
481 |
# @tool("web-research-assistant")
|
|
|
518 |
|
519 |
@tool("user-document-chat")
|
520 |
def doc_chain_tool(input_str: str, callbacks: Callbacks = None):
|
521 |
+
"""Usa sempre questo strumento almeno una volta. L'input dovrebbe essere una domanda."""
|
522 |
+
|
523 |
+
# """Always use this tool at least once. Input should be a question."""
|
524 |
+
response = st.session_state.doc_chain.invoke(
|
525 |
input_str,
|
526 |
config=get_config(callbacks),
|
527 |
)
|
528 |
+
with st.sidebar.expander("Sources"):
|
529 |
+
for source in response["source_documents"][:3]:
|
530 |
+
st.markdown("-" * 50)
|
531 |
+
st.markdown(source.page_content)
|
532 |
+
return response["output_text"]
|
533 |
+
|
534 |
+
# doc_chain_agent = get_doc_agent(
|
535 |
+
# [doc_chain_tool],
|
536 |
+
# st.session_state.llm,
|
537 |
+
# )
|
538 |
|
539 |
+
# @tool("document-question-tool")
|
540 |
+
# def doc_question_tool(input_str: str, callbacks: Callbacks = None):
|
541 |
+
# """Questo strumento è un assistente AI con accesso al documento caricato dall'utente.
|
542 |
+
# L'input dovrebbe essere una o più domande, richieste, istruzioni, ecc.
|
543 |
+
# """
|
544 |
|
545 |
+
# # """This tool is an AI assistant with access to the user's uploaded document.
|
546 |
+
# # Input should be one or more questions, requests, instructions, etc.
|
547 |
+
# # If the user's meaning is unclear, perhaps the answer is here.
|
548 |
+
# # Generally speaking, try this tool before conducting web research.
|
549 |
+
# # """
|
550 |
+
# return doc_chain_agent.invoke(
|
551 |
+
# input_str,
|
552 |
+
# config=get_config(callbacks),
|
553 |
+
# )
|
|
|
|
|
554 |
|
555 |
+
TOOLS = TOOLS + [doc_chain_tool]
|
556 |
|
557 |
+
st.session_state.chain = get_agent(
|
558 |
+
TOOLS,
|
559 |
+
STMEMORY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
st.session_state.llm,
|
561 |
+
callbacks,
|
|
|
|
|
|
|
562 |
)
|
563 |
+
# else:
|
564 |
+
# st.session_state.chain = get_runnable(
|
565 |
+
# True, # use_document_chat,
|
566 |
+
# document_chat_chain_type,
|
567 |
+
# st.session_state.llm,
|
568 |
+
# st.session_state.retriever,
|
569 |
+
# MEMORY,
|
570 |
+
# chat_prompt,
|
571 |
+
# prompt,
|
572 |
+
# )
|
573 |
|
574 |
# --- LLM call ---
|
575 |
try:
|
langchain-streamlit-demo/defaults.py
CHANGED
@@ -21,16 +21,19 @@ MODEL_DICT = {
|
|
21 |
|
22 |
SUPPORTED_MODELS = list(MODEL_DICT.keys())
|
23 |
|
24 |
-
DEFAULT_MODEL = os.environ.get(
|
|
|
|
|
25 |
|
26 |
DEFAULT_SYSTEM_PROMPT = os.environ.get(
|
27 |
"DEFAULT_SYSTEM_PROMPT",
|
28 |
# "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
|
29 |
"""
|
30 |
-
Comportati come un operatore di call center.
|
31 |
Hai accesso a tutte le informazioni necessarie per rispondere alle domande dei clienti tramite il tool 'document-question-tool'.
|
32 |
-
|
33 |
-
|
|
|
34 |
""",
|
35 |
)
|
36 |
|
@@ -45,6 +48,20 @@ DEFAULT_MAX_TOKENS = int(os.environ.get("DEFAULT_MAX_TOKENS", 1000))
|
|
45 |
DEFAULT_LANGSMITH_PROJECT = os.environ.get("LANGCHAIN_PROJECT")
|
46 |
|
47 |
TEST_QUESTIONS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"non ho ricevuto le credenziali di accesso all'area riservata: dove posso trovarle?",
|
49 |
"vorrei informazioni relativamente alle prestazioni presenti nel checkup",
|
50 |
"la risonanza magnetica è coperta dalla polizza?",
|
@@ -72,7 +89,7 @@ TEST_QUESTIONS = [
|
|
72 |
|
73 |
|
74 |
SHOW_LANGSMITH_OPTIONS = (
|
75 |
-
os.environ.get("SHOW_LANGSMITH_OPTIONS", "
|
76 |
)
|
77 |
SHOW_AZURE_OPTIONS = os.environ.get("SHOW_AZURE_OPTIONS", "true").lower() == "true"
|
78 |
|
|
|
21 |
|
22 |
SUPPORTED_MODELS = list(MODEL_DICT.keys())
|
23 |
|
24 |
+
DEFAULT_MODEL = os.environ.get(
|
25 |
+
"DEFAULT_MODEL", "gpt-3.5-turbo"
|
26 |
+
) # "gpt-4-turbo-preview")
|
27 |
|
28 |
DEFAULT_SYSTEM_PROMPT = os.environ.get(
|
29 |
"DEFAULT_SYSTEM_PROMPT",
|
30 |
# "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
|
31 |
"""
|
32 |
+
Comportati come un operatore di call center di Poste Assicurazioni. Ti vengono rivolte domande su polizze, prestazioni, autorizzazioni, rimborsi.
|
33 |
Hai accesso a tutte le informazioni necessarie per rispondere alle domande dei clienti tramite il tool 'document-question-tool'.
|
34 |
+
Usa sempre il 'document-question-tool' per trovare le informazioni necessarie a formulare la risposta. Rispondi sempre in maniera specifica e dettagliata rispetto alla polizza di Poste Assicurazioni accessibile tramite tool.
|
35 |
+
Se non sai rispondere ad una domanda, chiedi all'utente le informazioni mancante oppure rispondi che non hai la risposta e offri di connettere il cliente con un operatore umano. Non consigliare mai di leggere la polizza o di cercare informazioni altrove.
|
36 |
+
Rispondi in italiano, usando uno stile amichevole ma formale, e meno di 150 parole per risposta, a meno che non contengona una lunga lista.
|
37 |
""",
|
38 |
)
|
39 |
|
|
|
48 |
DEFAULT_LANGSMITH_PROJECT = os.environ.get("LANGCHAIN_PROJECT")
|
49 |
|
50 |
TEST_QUESTIONS = [
|
51 |
+
"Non ho ricevuto le credenziali di accesso all'area riservata: come posso ottenerle?",
|
52 |
+
"Quali prestazioni presenti nel checkup",
|
53 |
+
"La risonanza magnetica è coperta dalla polizza?",
|
54 |
+
"Le visite odontoiatriche sono coperte dalla polizza?",
|
55 |
+
"Come posso richiedere il checkup",
|
56 |
+
"Come posso trovare struttura convenzionata che faccia le risonanze magnetiche",
|
57 |
+
"Come chiedere autorizzazione per fare una risonanza magnetica?",
|
58 |
+
"Come chiedere un rimborso per una risonanza magnetica?",
|
59 |
+
"Quali prestazioni sono incluse nel pacchetto maternità?",
|
60 |
+
"Come chiedere autorizzazione per prestazioni incluse mel pacchetto maternità?",
|
61 |
+
"Come devo procedere per fare l'estensione della copertura al nucleo familiare?",
|
62 |
+
]
|
63 |
+
|
64 |
+
OLD_TEST_QUESTIONS = [
|
65 |
"non ho ricevuto le credenziali di accesso all'area riservata: dove posso trovarle?",
|
66 |
"vorrei informazioni relativamente alle prestazioni presenti nel checkup",
|
67 |
"la risonanza magnetica è coperta dalla polizza?",
|
|
|
89 |
|
90 |
|
91 |
SHOW_LANGSMITH_OPTIONS = (
|
92 |
+
os.environ.get("SHOW_LANGSMITH_OPTIONS", "false").lower() == "true"
|
93 |
)
|
94 |
SHOW_AZURE_OPTIONS = os.environ.get("SHOW_AZURE_OPTIONS", "true").lower() == "true"
|
95 |
|
langchain-streamlit-demo/llm_resources.py
CHANGED
@@ -2,19 +2,16 @@ import uuid
|
|
2 |
from tempfile import NamedTemporaryFile
|
3 |
from typing import Dict, List, Optional, Tuple
|
4 |
|
5 |
-
from defaults import
|
|
|
|
|
|
|
|
|
|
|
6 |
from langchain.agents import AgentExecutor, AgentType, initialize_agent
|
7 |
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
8 |
from langchain.callbacks.base import BaseCallbackHandler
|
9 |
from langchain.chains import RetrievalQA
|
10 |
-
from langchain.chat_models import (
|
11 |
-
AzureChatOpenAI,
|
12 |
-
ChatAnthropic,
|
13 |
-
ChatAnyscale,
|
14 |
-
ChatOpenAI,
|
15 |
-
)
|
16 |
-
from langchain.document_loaders import PyPDFLoader
|
17 |
-
from langchain.embeddings import OpenAIEmbeddings
|
18 |
from langchain.llms.base import BaseLLM
|
19 |
from langchain.memory import ConversationBufferMemory
|
20 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
@@ -27,8 +24,15 @@ from langchain.schema.runnable import RunnablePassthrough
|
|
27 |
from langchain.storage import InMemoryStore
|
28 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
29 |
from langchain.tools.base import BaseTool
|
30 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
from langchain_core.messages import SystemMessage
|
|
|
32 |
|
33 |
# from qagen import get_rag_qa_gen_chain
|
34 |
# from summarize import get_rag_summarization_chain
|
@@ -42,11 +46,12 @@ def get_agent(
|
|
42 |
):
|
43 |
memory_key = "agent_history"
|
44 |
system_message = SystemMessage(
|
45 |
-
content=
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
50 |
)
|
51 |
prompt = OpenAIFunctionsAgent.create_prompt(
|
52 |
system_message=system_message,
|
@@ -97,11 +102,17 @@ def get_doc_agent(
|
|
97 |
(
|
98 |
"system",
|
99 |
"""
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
),
|
106 |
("user", "{input}"),
|
107 |
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
@@ -157,13 +168,20 @@ def get_runnable(
|
|
157 |
retriever=retriever,
|
158 |
output_key="output_text",
|
159 |
return_source_documents=True,
|
160 |
-
) | (
|
161 |
-
lambda output: output["output_text"]
|
162 |
-
+ "\n\n"
|
163 |
-
+ "Source Documents:"
|
164 |
-
+ "\n"
|
165 |
-
+ "\n".join([x.page_content for x in output["source_documents"][:3]])
|
166 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
|
169 |
def get_llm(
|
@@ -232,7 +250,7 @@ def get_texts_and_multiretriever(
|
|
232 |
temp_file.write(uploaded_file_bytes)
|
233 |
temp_file.seek(0)
|
234 |
|
235 |
-
loader =
|
236 |
documents = loader.load()
|
237 |
text_splitter = RecursiveCharacterTextSplitter(
|
238 |
chunk_size=10000,
|
@@ -252,7 +270,10 @@ def get_texts_and_multiretriever(
|
|
252 |
_text.metadata[id_key] = _id
|
253 |
sub_texts.extend(_sub_texts)
|
254 |
|
255 |
-
embeddings_kwargs = {
|
|
|
|
|
|
|
256 |
# if use_azure and azure_kwargs:
|
257 |
# azure_kwargs["azure_endpoint"] = azure_kwargs.pop("openai_api_base")
|
258 |
# embeddings_kwargs.update(azure_kwargs)
|
|
|
2 |
from tempfile import NamedTemporaryFile
|
3 |
from typing import Dict, List, Optional, Tuple
|
4 |
|
5 |
+
from defaults import (
|
6 |
+
DEFAULT_CHUNK_OVERLAP,
|
7 |
+
DEFAULT_CHUNK_SIZE,
|
8 |
+
DEFAULT_RETRIEVER_K,
|
9 |
+
DEFAULT_SYSTEM_PROMPT,
|
10 |
+
)
|
11 |
from langchain.agents import AgentExecutor, AgentType, initialize_agent
|
12 |
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
13 |
from langchain.callbacks.base import BaseCallbackHandler
|
14 |
from langchain.chains import RetrievalQA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
from langchain.llms.base import BaseLLM
|
16 |
from langchain.memory import ConversationBufferMemory
|
17 |
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
|
24 |
from langchain.storage import InMemoryStore
|
25 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
26 |
from langchain.tools.base import BaseTool
|
27 |
+
from langchain_community.chat_models import (
|
28 |
+
AzureChatOpenAI,
|
29 |
+
ChatAnthropic,
|
30 |
+
ChatAnyscale,
|
31 |
+
)
|
32 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
33 |
+
from langchain_community.vectorstores.faiss import FAISS
|
34 |
from langchain_core.messages import SystemMessage
|
35 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
36 |
|
37 |
# from qagen import get_rag_qa_gen_chain
|
38 |
# from summarize import get_rag_summarization_chain
|
|
|
46 |
):
|
47 |
memory_key = "agent_history"
|
48 |
system_message = SystemMessage(
|
49 |
+
content=DEFAULT_SYSTEM_PROMPT
|
50 |
+
# (
|
51 |
+
# "Do your best to answer the questions. "
|
52 |
+
# "Feel free to use any tools available to look up "
|
53 |
+
# "relevant information, only if necessary"
|
54 |
+
# ),
|
55 |
)
|
56 |
prompt = OpenAIFunctionsAgent.create_prompt(
|
57 |
system_message=system_message,
|
|
|
102 |
(
|
103 |
"system",
|
104 |
"""
|
105 |
+
Assisti un chatbot a rispondere a domande su un documento di polizza.
|
106 |
+
Se necessario, suddividi le domande in più parti
|
107 |
+
e usa gli strumenti forniti per rispondere a domande più piccole
|
108 |
+
prima di rispondere alla domanda più grande.
|
109 |
""",
|
110 |
+
# """
|
111 |
+
# You assist a chatbot with answering questions about a document.
|
112 |
+
# If necessary, break up incoming questions into multiple parts,
|
113 |
+
# and use the tools provided to answer smaller questions before
|
114 |
+
# answering the larger question.
|
115 |
+
# """,
|
116 |
),
|
117 |
("user", "{input}"),
|
118 |
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
|
168 |
retriever=retriever,
|
169 |
output_key="output_text",
|
170 |
return_source_documents=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
)
|
172 |
+
# ) | (
|
173 |
+
# lambda output: output["output_text"]
|
174 |
+
# + "\n\n"
|
175 |
+
# + ("-" * 50)
|
176 |
+
# + "\nSource Documents:"
|
177 |
+
# + "\n"
|
178 |
+
# + "\n".join(
|
179 |
+
# [
|
180 |
+
# f"\nRisorsa {e}:\n" + x.page_content
|
181 |
+
# for e, x in enumerate(output["source_documents"][:3])
|
182 |
+
# ]
|
183 |
+
# )
|
184 |
+
# )
|
185 |
|
186 |
|
187 |
def get_llm(
|
|
|
250 |
temp_file.write(uploaded_file_bytes)
|
251 |
temp_file.seek(0)
|
252 |
|
253 |
+
loader = PyMuPDFLoader(temp_file.name, clip=(0, 0.10 * 842, 595, 0.85 * 842))
|
254 |
documents = loader.load()
|
255 |
text_splitter = RecursiveCharacterTextSplitter(
|
256 |
chunk_size=10000,
|
|
|
270 |
_text.metadata[id_key] = _id
|
271 |
sub_texts.extend(_sub_texts)
|
272 |
|
273 |
+
embeddings_kwargs = {
|
274 |
+
"openai_api_key": openai_api_key,
|
275 |
+
"model": "text-embedding-3-large",
|
276 |
+
}
|
277 |
# if use_azure and azure_kwargs:
|
278 |
# azure_kwargs["azure_endpoint"] = azure_kwargs.pop("openai_api_base")
|
279 |
# embeddings_kwargs.update(azure_kwargs)
|
langchain-streamlit-demo/research_assistant/search/web.py
CHANGED
@@ -6,7 +6,7 @@ from bs4 import BeautifulSoup
|
|
6 |
from langchain.llms.base import BaseLLM
|
7 |
from langchain.prompts import ChatPromptTemplate
|
8 |
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
|
9 |
-
from
|
10 |
from langchain_core.messages import SystemMessage
|
11 |
from langchain_core.output_parsers import StrOutputParser
|
12 |
from langchain_core.runnables import (
|
|
|
6 |
from langchain.llms.base import BaseLLM
|
7 |
from langchain.prompts import ChatPromptTemplate
|
8 |
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
|
9 |
+
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
|
10 |
from langchain_core.messages import SystemMessage
|
11 |
from langchain_core.output_parsers import StrOutputParser
|
12 |
from langchain_core.runnables import (
|
requirements.txt
CHANGED
@@ -6,6 +6,7 @@ duckduckgo-search==4.3.1
|
|
6 |
faiss-cpu==1.7.4
|
7 |
langchain==0.1.4
|
8 |
langchain-community==0.0.16
|
|
|
9 |
langsmith==0.0.84
|
10 |
# mypy==1.8.0
|
11 |
numexpr==2.9.0
|
@@ -22,3 +23,4 @@ tiktoken==0.5.2
|
|
22 |
tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
|
23 |
validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
|
24 |
wikipedia==1.4.0
|
|
|
|
6 |
faiss-cpu==1.7.4
|
7 |
langchain==0.1.4
|
8 |
langchain-community==0.0.16
|
9 |
+
langchain-openai
|
10 |
langsmith==0.0.84
|
11 |
# mypy==1.8.0
|
12 |
numexpr==2.9.0
|
|
|
23 |
tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
|
24 |
validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
|
25 |
wikipedia==1.4.0
|
26 |
+
pymupdf
|