|  | import streamlit as st | 
					
						
						|  | from dotenv import load_dotenv | 
					
						
						|  | from PyPDF2 import PdfReader | 
					
						
						|  | from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | 
					
						
						|  | from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings | 
					
						
						|  | from langchain.vectorstores import FAISS, Chroma | 
					
						
						|  | from langchain.embeddings import HuggingFaceEmbeddings | 
					
						
						|  | from langchain.chat_models import ChatOpenAI | 
					
						
						|  | from langchain.memory import ConversationBufferMemory | 
					
						
						|  | from langchain.chains import ConversationalRetrievalChain | 
					
						
						|  | from htmlTemplates import css, bot_template, user_template | 
					
						
						|  | from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers | 
					
						
						|  | from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader | 
					
						
						|  | import tempfile | 
					
						
						|  | import os | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_pdf_text(pdf_docs): | 
					
						
						|  | temp_dir = tempfile.TemporaryDirectory() | 
					
						
						|  | temp_filepath = os.path.join(temp_dir.name, pdf_docs.name) | 
					
						
						|  | with open(temp_filepath, "wb") as f: | 
					
						
						|  | f.write(pdf_docs.getvalue()) | 
					
						
						|  | pdf_loader = PyPDFLoader(temp_filepath) | 
					
						
						|  | pdf_doc = pdf_loader.load() | 
					
						
						|  | return pdf_doc | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_text_file(docs): | 
					
						
						|  | temp_dir = tempfile.TemporaryDirectory() | 
					
						
						|  | temp_path = os.path.join(temp_dir.name, docs.name) | 
					
						
						|  | with open(temp_path, "wb") as f: | 
					
						
						|  | f.write(docs.getvalue()) | 
					
						
						|  | loader = TextLoader(temp_path) | 
					
						
						|  | doc = loader.load() | 
					
						
						|  | return doc | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_csv_file(docs): | 
					
						
						|  | temp_dir = tempfile.TemporaryDirectory() | 
					
						
						|  | temp_path = os.path.join(temp_dir.name, docs.name) | 
					
						
						|  | with open(temp_path, "wb") as f: | 
					
						
						|  | f.write(docs.getvalue()) | 
					
						
						|  | loader = CSVLoader(temp_path) | 
					
						
						|  | doc = loader.load() | 
					
						
						|  | return doc | 
					
						
						|  |  | 
					
						
						|  | def get_json_file(docs): | 
					
						
						|  | temp_dir = tempfile.TemporaryDirectory() | 
					
						
						|  | temp_path = os.path.join(temp_dir.name, docs.name) | 
					
						
						|  | with open(temp_path, "wb") as f: | 
					
						
						|  | f.write(docs.getvalue()) | 
					
						
						|  | loader = JSONLoader(temp_path) | 
					
						
						|  | doc = loader.load() | 
					
						
						|  | return doc | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_text_chunks(documents): | 
					
						
						|  | text_splitter = RecursiveCharacterTextSplitter( | 
					
						
						|  | chunk_size=1000, | 
					
						
						|  | chunk_overlap=200, | 
					
						
						|  | length_function=len | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | documents = text_splitter.split_documents(documents) | 
					
						
						|  | return documents | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_vectorstore(text_chunks): | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | embeddings = OpenAIEmbeddings() | 
					
						
						|  | vectorstore = FAISS.from_documents(text_chunks, embeddings) | 
					
						
						|  |  | 
					
						
						|  | return vectorstore | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_conversation_chain(vectorstore): | 
					
						
						|  | gpt_model_name = 'gpt-3.5-turbo' | 
					
						
						|  | llm = ChatOpenAI(model_name = gpt_model_name) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | memory = ConversationBufferMemory( | 
					
						
						|  | memory_key='chat_history', return_messages=True) | 
					
						
						|  |  | 
					
						
						|  | conversation_chain = ConversationalRetrievalChain.from_llm( | 
					
						
						|  | llm=llm, | 
					
						
						|  | retriever=vectorstore.as_retriever(), | 
					
						
						|  | memory=memory | 
					
						
						|  | ) | 
					
						
						|  | return conversation_chain | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def handle_userinput(user_question): | 
					
						
						|  |  | 
					
						
						|  | response = st.session_state.conversation({'question': user_question}) | 
					
						
						|  |  | 
					
						
						|  | st.session_state.chat_history = response['chat_history'] | 
					
						
						|  |  | 
					
						
						|  | for i, message in enumerate(st.session_state.chat_history): | 
					
						
						|  | if i % 2 == 0: | 
					
						
						|  | st.write(user_template.replace( | 
					
						
						|  | "{{MSG}}", message.content), unsafe_allow_html=True) | 
					
						
						|  | else: | 
					
						
						|  | st.write(bot_template.replace( | 
					
						
						|  | "{{MSG}}", message.content), unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def main(): | 
					
						
						|  | load_dotenv() | 
					
						
						|  | st.set_page_config(page_title="Chat with multiple Files", | 
					
						
						|  | page_icon=":books:") | 
					
						
						|  | st.write(css, unsafe_allow_html=True) | 
					
						
						|  |  | 
					
						
						|  | if "conversation" not in st.session_state: | 
					
						
						|  | st.session_state.conversation = None | 
					
						
						|  | if "chat_history" not in st.session_state: | 
					
						
						|  | st.session_state.chat_history = None | 
					
						
						|  |  | 
					
						
						|  | st.header("Chat with multiple Files :") | 
					
						
						|  | user_question = st.text_input("Ask a question about your documents:") | 
					
						
						|  | if user_question: | 
					
						
						|  | handle_userinput(user_question) | 
					
						
						|  |  | 
					
						
						|  | with st.sidebar: | 
					
						
						|  | openai_key = st.text_input("Paste your OpenAI API key (sk-...)") | 
					
						
						|  | if openai_key: | 
					
						
						|  | os.environ["OPENAI_API_KEY"] = openai_key | 
					
						
						|  |  | 
					
						
						|  | st.subheader("Your documents") | 
					
						
						|  | docs = st.file_uploader( | 
					
						
						|  | "Upload your PDFs here and click on 'Process'", accept_multiple_files=True) | 
					
						
						|  | if st.button("Process"): | 
					
						
						|  | with st.spinner("Processing"): | 
					
						
						|  |  | 
					
						
						|  | doc_list = [] | 
					
						
						|  |  | 
					
						
						|  | for file in docs: | 
					
						
						|  | print('file - type : ', file.type) | 
					
						
						|  | if file.type == 'text/plain': | 
					
						
						|  |  | 
					
						
						|  | doc_list.extend(get_text_file(file)) | 
					
						
						|  | elif file.type in ['application/octet-stream', 'application/pdf']: | 
					
						
						|  |  | 
					
						
						|  | doc_list.extend(get_pdf_text(file)) | 
					
						
						|  | elif file.type == 'text/csv': | 
					
						
						|  |  | 
					
						
						|  | doc_list.extend(get_csv_file(file)) | 
					
						
						|  | elif file.type == 'application/json': | 
					
						
						|  |  | 
					
						
						|  | doc_list.extend(get_json_file(file)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | text_chunks = get_text_chunks(doc_list) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | vectorstore = get_vectorstore(text_chunks) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.session_state.conversation = get_conversation_chain( | 
					
						
						|  | vectorstore) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if __name__ == '__main__': | 
					
						
						|  | main() | 
					
						
						|  |  |