import os from langchain_openai import ChatOpenAI from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.document_loaders import PyPDFLoader from langchain.chains import ConversationalRetrievalChain from langchain_community.chat_message_histories import ChatMessageHistory from langchain.memory import ConversationBufferMemory from langchain_core.prompts import PromptTemplate # Access the OpenAI API key from the environment open_ai_key = os.getenv("OPENAI_API_KEY") llm = ChatOpenAI(api_key=open_ai_key) template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context} Question: {question} Only return the helpful answer below and nothing else. Helpful answer: """ prompt = PromptTemplate(template=template, input_variables=["context", "question"]) # Load and process the PDF loader = PyPDFLoader(pdf_file.name) pdf_data = loader.load() # Split the text into chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = text_splitter.split_documents(pdf_data) # Create a Chroma vector store embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") db = Chroma.from_documents(docs, embeddings) # Initialize message history for conversation message_history = ChatMessageHistory() # Memory for conversational context memory = ConversationBufferMemory( memory_key="chat_history", output_key="answer", chat_memory=message_history, return_messages=True, ) # Create a chain that uses the Chroma vector store chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", retriever=db.as_retriever(), memory=memory, return_source_documents=False, combine_docs_chain_kwargs={'prompt': prompt} ) # Process the question res = chain({"question": question}) answer = res["answer"]