thivav's picture
init commit
d498edb
raw
history blame contribute delete
No virus
3.89 kB
# import os
import os
import tempfile
import streamlit as st
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from streamlit_extras.add_vertical_space import add_vertical_space
@st.cache_resource(ttl="1h")
def load_retriever(pdf_files):
"""load pdf files"""
docs = []
temp_dir = tempfile.TemporaryDirectory()
for pdf_file in pdf_files:
temp_pdf_file_path = os.path.join(temp_dir.name, pdf_file.name)
with open(temp_pdf_file_path, "wb") as f:
f.write(pdf_file.getvalue())
loader = PyPDFLoader(temp_pdf_file_path)
docs.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=1500, chunk_overlap=200
)
chunks = text_splitter.split_documents(docs)
# embeddings
embeddings = OpenAIEmbeddings()
vector_db = FAISS.from_documents(chunks, embeddings)
retriever = vector_db.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"score_threshold": 0.5, "k": 5},
)
return retriever
def main():
"""main"""
st.set_page_config(
page_title="Talk to PDF using GPT 3.5",
page_icon="πŸ“°",
layout="centered",
initial_sidebar_state="expanded",
)
st.header("Talk to PDF files πŸ“°", divider="rainbow")
st.subheader(
"Enjoy :red[talking] with :green[PDF] files using :sunglasses: OpenAI GPT 3.5 Turbo"
)
st.sidebar.title("Talk to PDF πŸ“°")
st.sidebar.markdown(
"[Checkout the repository](https://github.com/ThivaV/chat_with_pdf_using_gpt)"
)
st.sidebar.markdown(
"""
### This is a LLM powered chatbot, built using:
* [Streamlit](https://streamlit.io)
* [LangChain](https://python.langchain.com/)
* [OpenAI](https://platform.openai.com/docs/models)
___
"""
)
add_vertical_space(2)
openai_key = st.sidebar.text_input(label="Enter the OpenAI key πŸ‘‡", type="password")
if not openai_key:
st.info("πŸ‘ˆ :red[Please enter the OpenAI key] β›”")
st.stop()
# set the OPENAI_API_KEY to environment
os.environ["OPENAI_API_KEY"] = openai_key
add_vertical_space(1)
upload_pdf_files = st.sidebar.file_uploader(
"Upload a pdf files πŸ“€", type="pdf", accept_multiple_files=True
)
if not upload_pdf_files:
st.info("πŸ‘ˆ :red[Please upload pdf files] β›”")
st.stop()
retriever = load_retriever(upload_pdf_files)
chat_history = StreamlitChatMessageHistory()
# init chat history memory
memory = ConversationBufferMemory(
memory_key="chat_history", chat_memory=chat_history, return_messages=True
)
llm = ChatOpenAI(
model_name="gpt-3.5-turbo",
openai_api_key=openai_key,
temperature=0,
streaming=True,
)
chain = ConversationalRetrievalChain.from_llm(
llm, retriever=retriever, memory=memory, verbose=False
)
# load previous chat history
# re-draw the chat history in the chat window
for message in chat_history.messages:
st.chat_message(message.type).write(message.content)
if prompt := st.chat_input("Ask questions"):
with st.chat_message("human"):
st.markdown(prompt)
response = chain.run(prompt)
with st.chat_message("ai"):
st.write(response)
if __name__ == "__main__":
# init streamlit
main()