File size: 4,418 Bytes
97bce8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3916e88
97bce8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c171266
97bce8f
 
 
c171266
 
97bce8f
 
 
 
3916e88
 
 
 
 
 
 
 
 
97bce8f
c171266
 
97bce8f
c171266
97bce8f
4669820
97bce8f
 
 
 
3916e88
97bce8f
 
 
 
 
 
 
ec68d04
97bce8f
 
d3f4592
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

os.environ['GOOGLE_API_KEY'] = 'AIzaSyD8uzXToT4I2ABs7qo_XiuKh8-L2nuWCEM'


def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text


def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    chunks = text_splitter.split_text(text)
    return chunks


def get_vector_store(text_chunks):
    embeddings = GooglePalmEmbeddings()
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    return vector_store


def get_conversational_chain(vector_store):
    llm = GooglePalm()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
    return conversation_chain


def user_input(user_question):
    with st.container():
        response = st.session_state.conversation({'question': user_question})
        st.session_state.chatHistory = response['chat_history']
        file_contents = ""
        left , right = st.columns((2,1))
        with left:
            for i, message in enumerate(st.session_state.chatHistory):
                if i % 2 == 0:
                    st.write("Human:", message.content)
                else:
                    st.write("Bot:", message.content)
            st.success("Done !")
        with right:
            for message in st.session_state.chatHistory:
                file_contents += f"{message.content}\n"
            file_name = "Chat_History.txt"
            st.download_button("Download chat history👈", file_contents, file_name=file_name, mime="text/plain")


def summary(summarization):
    with st.container():
        file_contents = ''
        left , right = st.columns((2,1))
        with left:
            if summarization:
                response1 = st.session_state.conversation({'question': 'Retrieve one-line topics and their descriptors; create detailed, bulleted summaries for each topic.'})
                st.write("summary:\n", response1['answer'])
                st.success("Done !")
            else:
                response1 = {}

        with right:
            file_contents = response1.get('answer', '')
            file_name = "summarization_result.txt"
            st.download_button("Download Summary", file_contents, file_name=file_name, mime="text/plain")


def main():
    st.set_page_config("LOR ChatAI")
    st.header("LOR ChatAI")
    st.write("---")
    with st.container():
        with st.sidebar:
            st.title("Settings")
            st.subheader("Upload your Documents")
            pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Process Button", accept_multiple_files=True)
            if st.button("Process"):
                with st.spinner("Processing"):
                    raw_text = get_pdf_text(pdf_docs)
                    text_chunks = get_text_chunks(raw_text)
                    vector_store = get_vector_store(text_chunks)
                    st.session_state.conversation = get_conversational_chain(vector_store)
                    st.success("Done")
    with st.container():
        # Summarisation Section
        st.subheader("PDF Summarisation")
        st.write('Click on summary button to get summary on given uploaded file.')
        summarization = st.button("Summarise")
        summary(summarization)
        
    st.write("---")

    with st.container():
        # Question Section
        st.subheader("PDF question-answer section")
        user_question = st.text_input("Ask a Question from the PDF Files")
        if "conversation" not in st.session_state:
            st.session_state.conversation = None
        if "chatHistory" not in st.session_state:
            st.session_state.chatHistory = None
        if user_question:
            user_input(user_question)
    st.write('##')

if __name__ == "__main__":
    main()