Update utills.py
Browse files
utills.py
CHANGED
@@ -1,96 +1,96 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
|
4 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
-
import torch
|
6 |
-
from transformers import AutoTokenizer
|
7 |
-
from langchain.retrievers.document_compressors import LLMChainExtractor
|
8 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
-
from langchain.document_loaders import TextLoader
|
10 |
-
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
11 |
-
from langchain.memory import ConversationBufferMemory
|
12 |
-
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
13 |
-
from langchain.chains.combine_documents import create_stuff_documents_chain
|
14 |
-
from langchain_core.runnables.history import RunnableWithMessageHistory
|
15 |
-
from langchain_core.chat_history import BaseChatMessageHistory
|
16 |
-
from langchain_community.chat_message_histories import ChatMessageHistory
|
17 |
-
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
18 |
-
from typing import Callable, Dict, List, Optional, Union
|
19 |
-
from langchain.vectorstores import Chroma
|
20 |
-
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
21 |
-
from langchain.document_loaders import PyPDFLoader
|
22 |
-
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
|
23 |
-
from langchain_community.llms import llamacpp
|
24 |
-
|
25 |
-
|
26 |
-
store = {}
|
27 |
-
|
28 |
-
def get_session_history(session_id: str):
|
29 |
-
if session_id not in store:
|
30 |
-
store[session_id] = ChatMessageHistory()
|
31 |
-
return store[session_id]
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
def load_documents(data_path):
|
37 |
-
try:
|
38 |
-
document_loader = PyPDFDirectoryLoader(data_path)
|
39 |
-
return document_loader.load()
|
40 |
-
except Exception as e:
|
41 |
-
print(f"Error loading documents from {data_path}: {e}")
|
42 |
-
return None # or handle the error in an appropriate manner
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
def split_docs(documents, chunk_size, chunk_overlap):
|
47 |
-
try:
|
48 |
-
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
49 |
-
chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
50 |
-
separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
|
51 |
-
)
|
52 |
-
docs = text_splitter.split_documents(documents)
|
53 |
-
return docs
|
54 |
-
except Exception as e:
|
55 |
-
print(f"Error splitting documents: {e}")
|
56 |
-
return [] # or handle the error in an appropriate manner
|
57 |
-
|
58 |
-
|
59 |
-
def chroma_db(docs, embeddings):
|
60 |
-
try:
|
61 |
-
vectordb = Chroma.from_documents(
|
62 |
-
documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
|
63 |
-
)
|
64 |
-
return vectordb
|
65 |
-
except Exception as e:
|
66 |
-
print(f"Error creating Chroma vector database: {e}")
|
67 |
-
return None # or handle the error in an appropriate manner
|
68 |
-
|
69 |
-
|
70 |
-
def retriever_from_chroma(vectordb, search_type, k):
|
71 |
-
retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
|
72 |
-
return retriever
|
73 |
-
|
74 |
-
|
75 |
-
def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
|
76 |
-
try:
|
77 |
-
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
78 |
-
[
|
79 |
-
("system", contextualize_q_system_prompt),
|
80 |
-
MessagesPlaceholder("chat_history"),
|
81 |
-
("human", "{input}"),
|
82 |
-
]
|
83 |
-
)
|
84 |
-
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
|
85 |
-
return history_aware_retriever
|
86 |
-
except Exception as e:
|
87 |
-
print(f"Error creating history-aware retriever: {e}")
|
88 |
-
return None # or handle the error in an appropriate manner
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
def echo(question, history):
|
94 |
-
ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
|
95 |
-
chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
|
96 |
-
return ai_message['answer']
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
|
4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
+
import torch
|
6 |
+
from transformers import AutoTokenizer
|
7 |
+
from langchain.retrievers.document_compressors import LLMChainExtractor
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain.document_loaders import TextLoader
|
10 |
+
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
11 |
+
from langchain.memory import ConversationBufferMemory
|
12 |
+
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
13 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
14 |
+
from langchain_core.runnables.history import RunnableWithMessageHistory
|
15 |
+
from langchain_core.chat_history import BaseChatMessageHistory
|
16 |
+
from langchain_community.chat_message_histories import ChatMessageHistory
|
17 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
18 |
+
from typing import Callable, Dict, List, Optional, Union
|
19 |
+
from langchain.vectorstores import Chroma
|
20 |
+
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
21 |
+
from langchain.document_loaders import PyPDFLoader
|
22 |
+
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
|
23 |
+
from langchain_community.llms import llamacpp
|
24 |
+
|
25 |
+
|
26 |
+
store = {}
|
27 |
+
@st.cache_resource
|
28 |
+
def get_session_history(session_id: str):
|
29 |
+
if session_id not in store:
|
30 |
+
store[session_id] = ChatMessageHistory()
|
31 |
+
return store[session_id]
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
@st.cache_resource
|
36 |
+
def load_documents(data_path):
|
37 |
+
try:
|
38 |
+
document_loader = PyPDFDirectoryLoader(data_path)
|
39 |
+
return document_loader.load()
|
40 |
+
except Exception as e:
|
41 |
+
print(f"Error loading documents from {data_path}: {e}")
|
42 |
+
return None # or handle the error in an appropriate manner
|
43 |
+
|
44 |
+
|
45 |
+
@st.cache_resource
|
46 |
+
def split_docs(documents, chunk_size, chunk_overlap):
|
47 |
+
try:
|
48 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
49 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
50 |
+
separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
|
51 |
+
)
|
52 |
+
docs = text_splitter.split_documents(documents)
|
53 |
+
return docs
|
54 |
+
except Exception as e:
|
55 |
+
print(f"Error splitting documents: {e}")
|
56 |
+
return [] # or handle the error in an appropriate manner
|
57 |
+
|
58 |
+
@st.cache_resource
|
59 |
+
def chroma_db(docs, embeddings):
|
60 |
+
try:
|
61 |
+
vectordb = Chroma.from_documents(
|
62 |
+
documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
|
63 |
+
)
|
64 |
+
return vectordb
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error creating Chroma vector database: {e}")
|
67 |
+
return None # or handle the error in an appropriate manner
|
68 |
+
|
69 |
+
@st.cache_resource
|
70 |
+
def retriever_from_chroma(vectordb, search_type, k):
|
71 |
+
retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
|
72 |
+
return retriever
|
73 |
+
|
74 |
+
@st.cache_resource
|
75 |
+
def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
|
76 |
+
try:
|
77 |
+
contextualize_q_prompt = ChatPromptTemplate.from_messages(
|
78 |
+
[
|
79 |
+
("system", contextualize_q_system_prompt),
|
80 |
+
MessagesPlaceholder("chat_history"),
|
81 |
+
("human", "{input}"),
|
82 |
+
]
|
83 |
+
)
|
84 |
+
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
|
85 |
+
return history_aware_retriever
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error creating history-aware retriever: {e}")
|
88 |
+
return None # or handle the error in an appropriate manner
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
@st.cache_resource
|
93 |
+
def echo(question, history):
|
94 |
+
ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
|
95 |
+
chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
|
96 |
+
return ai_message['answer']
|