Spaces:
Sleeping
Sleeping
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_community.chat_message_histories import ChatMessageHistory | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.prompts import PromptTemplate | |
# Access the OpenAI API key from the environment | |
open_ai_key = os.getenv("OPENAI_API_KEY") | |
llm = ChatOpenAI(api_key=open_ai_key) | |
template = """Use the following pieces of information to answer the user's question. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Context: {context} | |
Question: {question} | |
Only return the helpful answer below and nothing else. | |
Helpful answer: | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
# Load and process the PDF | |
loader = PyPDFLoader(pdf_file.name) | |
pdf_data = loader.load() | |
# Split the text into chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
docs = text_splitter.split_documents(pdf_data) | |
# Create a Chroma vector store | |
embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") | |
db = Chroma.from_documents(docs, embeddings) | |
# Initialize message history for conversation | |
message_history = ChatMessageHistory() | |
# Memory for conversational context | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
output_key="answer", | |
chat_memory=message_history, | |
return_messages=True, | |
) | |
# Create a chain that uses the Chroma vector store | |
chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
chain_type="stuff", | |
retriever=db.as_retriever(), | |
memory=memory, | |
return_source_documents=False, | |
combine_docs_chain_kwargs={'prompt': prompt} | |
) | |
# Process the question | |
res = chain({"question": question}) | |
answer = res["answer"] | |