File size: 4,583 Bytes
3f06cdf 36371ad 3f06cdf 62a93dd 3f06cdf 906f814 3f06cdf 906f814 3f06cdf 4367a52 3f06cdf 6a4fd86 3f06cdf 62a93dd 906f814 8993fa2 9bb9fa9 8993fa2 906f814 76ae2e1 906f814 8993fa2 3f06cdf 6a4fd86 3f06cdf 0c89368 c1166ca 25c9720 ca51d17 0c89368 ca51d17 6da1aa9 fc91bdd e737bbc 0ccc0b8 89e0c2c c1166ca 89e0c2c c1166ca 89e0c2c 906f814 c1166ca 89e0c2c 906f814 89e0c2c 3f06cdf 9bb9fa9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
from pypdf import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks, openai_api_key, embedding_model):
embeddings = OpenAIEmbeddings(api_key=openai_api_key, model=embedding_model)
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore, openai_api_key, chat_model):
llm = ChatOpenAI(api_key=openai_api_key, model=chat_model)
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory
)
return conversation_chain
def handle_userinput(user_question):
# Simpan pertanyaan pengguna ke dalam riwayat chat
st.session_state.chat_history.append({"role": "user", "content": user_question})
# Dapatkan respons dari AI
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history.append({"role": "bot", "content": response['answer']})
# Tampilkan semua pesan dalam riwayat chat
for message in st.session_state.chat_history:
if message['role'] == 'user':
st.write(user_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)
else:
st.write(bot_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)
def main():
st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
st.write(css, unsafe_allow_html=True)
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
# Pilihan model untuk embeddings
embedding_model_options = [
"text-embedding-3-large",
"text-embedding-3-small",
"text-embedding-ada-002"
]
selected_embedding_model = st.sidebar.selectbox("Select the Embedding Model", embedding_model_options)
# Pilihan model untuk chat
chat_model_options = [
"gpt-4o-mini",
"gpt-3.5-turbo-0125"
]
selected_chat_model = st.sidebar.selectbox("Select the Chat Model", chat_model_options)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
st.header("Chat with multiple PDFs :books:")
st.write("Please enter the data in the menu on the left") # Menambahkan teks di sini
# Menggunakan text_area untuk input pengguna
user_question = st.text_area("Ask a question about your documents:", height=100)
# Menambahkan tombol untuk mengirim pertanyaan
if st.button("Send") and user_question and st.session_state.conversation:
handle_userinput(user_question)
st.session_state.user_question = "" # Mengosongkan input setelah mengirim
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader("Upload your PDFs here", accept_multiple_files=True)
if pdf_docs and openai_api_key:
if st.button("Process PDFs"):
with st.spinner("Processing"):
# get pdf text
raw_text = get_pdf_text(pdf_docs)
# get the text chunks
text_chunks = get_text_chunks(raw_text)
# create vector store
vectorstore = get_vectorstore(text_chunks, openai_api_key, selected_embedding_model)
# create conversation chain
st.session_state.conversation = get_conversation_chain(vectorstore, openai_api_key, selected_chat_model)
st.success("PDFs processed successfully!")
if __name__ == '__main__':
main() |