File size: 4,159 Bytes
ec39c22 9408fa0 ec39c22 5a77081 ec39c22 5a77081 bccf29a ec39c22 5a77081 bccf29a 29373ad bccf29a 5a77081 ac7d30d ec39c22 5a77081 bccf29a f63cbb4 ec39c22 f63cbb4 ec39c22 5a77081 ec39c22 5a77081 ec39c22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
import sys
from langchain.text_splitter import TokenTextSplitter,RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
import torch
from transformers import AutoTokenizer
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from typing import Callable, Dict, List, Optional, Union
from langchain.vectorstores import Chroma
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_community.llms import llamacpp
import streamlit as st
store = {}
def get_session_history(session_id: str):
if session_id not in store:
store[session_id] = ChatMessageHistory()
return store[session_id]
def load_pdf_documents(data_path):
try:
document_loader = PyPDFDirectoryLoader(data_path)
return document_loader.load()
except Exception as e:
print(f"Error loading documents from {data_path}: {e}")
return None # or handle the error in an appropriate manner
def load_txt_documents(data_path):
documents = []
for filename in os.listdir(data_path):
if filename.endswith('.txt'):
file_path = os.path.join(data_path, filename)
try:
documents.extend(TextLoader(file_path).load())
except Exception as e:
st.warning(f"Error loading file '{filename}': {e}")
return documents
def split_docs(documents, chunk_size, chunk_overlap):
try:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=chunk_size, chunk_overlap=chunk_overlap,
separators=["\n \n \n", "\n \n", "\n1", "(?<=\. )", " ", ""]
)
docs = text_splitter.split_documents(documents)
return docs
except Exception as e:
print(f"Error splitting documents: {e}")
return [] # or handle the error in an appropriate manner
def load_uploaded_documents(uploaded_files):
documents = []
for uploaded_file in uploaded_files:
content = uploaded_file.read().decode("utf-8")
documents.append({"content": content, "filename": uploaded_file.name})
return documents
def retriever_from_chroma(docs, embeddings, search_type, k):
vectordb = Chroma.from_documents(
documents=docs, embedding=embeddings, persist_directory="docs/chroma/"
)
retriever = vectordb.as_retriever(search_type=search_type, search_kwargs={"k": k})
return retriever
def history_aware_retriever(llm, retriever, contextualize_q_system_prompt):
try:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
]
)
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
return history_aware_retriever
except Exception as e:
print(f"Error creating history-aware retriever: {e}")
return None # or handle the error in an appropriate manner
def echo(question, history):
ai_message = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_message["answer"]])
return ai_message['answer']
|