Spaces:

afkarfcom
/

Chat-Pdf

Running

File size: 4,583 Bytes

3f06cdf
36371ad
3f06cdf
62a93dd
3f06cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906f814
 
3f06cdf
 
 
906f814
 
3f06cdf
 
 
 
 
 
 
 
 
 
4367a52
 
 
 
 
 
 
 
 
 
 
 
 
3f06cdf
 
6a4fd86
3f06cdf
 
62a93dd
 
906f814
 
8993fa2
 
9bb9fa9
8993fa2
906f814
 
 
 
 
76ae2e1
906f814
 
8993fa2
3f06cdf
 
 
6a4fd86
3f06cdf
0c89368
c1166ca
25c9720
ca51d17
 
 
 
 
0c89368
ca51d17
6da1aa9
fc91bdd
 
e737bbc
0ccc0b8
 
89e0c2c
 
 
 
c1166ca
89e0c2c
 
c1166ca
89e0c2c
906f814
c1166ca
89e0c2c
906f814
89e0c2c
3f06cdf
 
9bb9fa9

import streamlit as st
from pypdf import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks, openai_api_key, embedding_model):
    embeddings = OpenAIEmbeddings(api_key=openai_api_key, model=embedding_model)
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_conversation_chain(vectorstore, openai_api_key, chat_model):
    llm = ChatOpenAI(api_key=openai_api_key, model=chat_model)  
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

def handle_userinput(user_question):
    # Simpan pertanyaan pengguna ke dalam riwayat chat
    st.session_state.chat_history.append({"role": "user", "content": user_question})
    
    # Dapatkan respons dari AI
    response = st.session_state.conversation({'question': user_question})
    st.session_state.chat_history.append({"role": "bot", "content": response['answer']})

    # Tampilkan semua pesan dalam riwayat chat
    for message in st.session_state.chat_history:
        if message['role'] == 'user':
            st.write(user_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)
        else:
            st.write(bot_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)

def main():
    st.set_page_config(page_title="Chat with multiple PDFs", page_icon=":books:")
    st.write(css, unsafe_allow_html=True)

    openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")

    # Pilihan model untuk embeddings
    embedding_model_options = [
        "text-embedding-3-large",
        "text-embedding-3-small",
        "text-embedding-ada-002"
    ]
    selected_embedding_model = st.sidebar.selectbox("Select the Embedding Model", embedding_model_options)

    # Pilihan model untuk chat
    chat_model_options = [
        "gpt-4o-mini",
        "gpt-3.5-turbo-0125"
    ]
    selected_chat_model = st.sidebar.selectbox("Select the Chat Model", chat_model_options)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    st.header("Chat with multiple PDFs :books:")
    st.write("Please enter the data in the menu on the left")  # Menambahkan teks di sini
    
    # Menggunakan text_area untuk input pengguna
    user_question = st.text_area("Ask a question about your documents:", height=100)

    # Menambahkan tombol untuk mengirim pertanyaan
    if st.button("Send") and user_question and st.session_state.conversation:
        handle_userinput(user_question)
        st.session_state.user_question = ""  # Mengosongkan input setelah mengirim

    with st.sidebar:
        st.subheader("Your documents")
        pdf_docs = st.file_uploader("Upload your PDFs here", accept_multiple_files=True)
        
        if pdf_docs and openai_api_key:
            if st.button("Process PDFs"):
                with st.spinner("Processing"):
                    # get pdf text
                    raw_text = get_pdf_text(pdf_docs)

                    # get the text chunks
                    text_chunks = get_text_chunks(raw_text)

                    # create vector store
                    vectorstore = get_vectorstore(text_chunks, openai_api_key, selected_embedding_model)

                    # create conversation chain
                    st.session_state.conversation = get_conversation_chain(vectorstore, openai_api_key, selected_chat_model)
                    st.success("PDFs processed successfully!")

if __name__ == '__main__':
    main()