File size: 5,011 Bytes
be63200
1dc9fa7
375bd04
9b3a016
1dc9fa7
79fbe78
 
a850fbe
9b3a016
 
 
444d231
7ef822f
 
 
 
 
9b3a016
444d231
 
9b3a016
 
 
bbbffce
a850fbe
be63200
 
 
 
 
 
 
 
5df5027
be63200
 
 
 
 
 
 
 
 
a850fbe
be63200
 
 
 
 
 
 
 
 
1334178
be63200
 
 
 
 
 
 
 
de20d93
1612952
 
259cbe8
9b3a016
 
 
 
9e53bcd
9b3a016
 
 
be63200
9b3a016
 
 
 
 
 
a850fbe
1612952
a850fbe
9b3a016
 
 
 
 
 
 
 
444d231
 
1334178
9b3a016
 
444d231
be63200
9b3a016
 
 
 
 
 
 
be63200
9b3a016
 
 
 
be63200
 
a850fbe
 
ffc1b97
1612952
 
 
 
 
 
 
 
ffc1b97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a850fbe
 
be63200
a850fbe
be63200
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import streamlit as st

from chat_profile import ChatProfileRoleEnum
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, TextLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory

__import__("pysqlite3")
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

# config page
st.set_page_config(page_title="InkChatGPT", page_icon="πŸ“š")

# Set up memory
msgs = StreamlitChatMessageHistory(key="langchain_messages")


def load_and_process_file(file_data):
    """
    Load and process the uploaded file.
    Returns a vector store containing the embedded chunks of the file.
    """
    file_name = os.path.join("./", file_data.name)
    with open(file_name, "wb") as f:
        f.write(file_data.getvalue())

    _, extension = os.path.splitext(file_name)

    # Load the file using the appropriate loader
    if extension == ".pdf":
        loader = PyPDFLoader(file_name)
    elif extension == ".docx":
        loader = Docx2txtLoader(file_name)
    elif extension == ".txt":
        loader = TextLoader(file_name)
    else:
        st.error("This document format is not supported!")
        return None

    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
    )
    chunks = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings(api_key=st.session_state.api_key)
    vector_store = Chroma.from_documents(chunks, embeddings)
    return vector_store


def main():
    """
    The main function that runs the Streamlit app.
    """

    if not st.session_state.api_key:
        st.info("Please add your OpenAI API key to continue.")

    if len(msgs.messages) == 0:
        msgs.add_ai_message(
            """
            Hello, how can I help you?

            You can upload a document and chat with me to ask questions related to its content.
        """
        )

    # Render current messages from StreamlitChatMessageHistory
    for msg in msgs.messages:
        st.chat_message(msg.type).write(msg.content)

    # If user inputs a new prompt, generate and draw a new response
    if question := st.chat_input(
        placeholder="Chat with your document",
        disabled=(not st.session_state.api_key),
    ):
        st.chat_message(ChatProfileRoleEnum.Human).write(question)
        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", "You are an AI chatbot having a conversation with a human."),
                MessagesPlaceholder(variable_name="history"),
                (ChatProfileRoleEnum.Human, f"{question}"),
            ]
        )

        llm = ChatOpenAI(
            api_key=st.session_state.api_key,
            temperature=0.0,
            model_name="gpt-3.5-turbo",
        )

        chain = prompt | llm
        chain_with_history = RunnableWithMessageHistory(
            chain,
            lambda session_id: msgs,
            input_messages_key="question",
            history_messages_key="history",
        )

        # Note: new messages are saved to history automatically by Langchain during run
        config = {"configurable": {"session_id": "any"}}
        response = chain_with_history.invoke({"question": question}, config)
        st.chat_message(ChatProfileRoleEnum.AI).write(response.content)


def build_sidebar():
    with st.sidebar:
        st.subheader("πŸ“š InkChatGPT")

        openai_api_key = st.text_input(
            "OpenAI API Key",
            type="password",
            placeholder="Enter your OpenAI API key",
        )
        st.session_state.api_key = openai_api_key

        with st.form("my_form"):

            uploaded_file = st.file_uploader(
                "Select a file", type=["pdf", "docx", "txt"], key="file_uploader"
            )

            add_file = st.form_submit_button(
                "Process File",
                disabled=(not uploaded_file and not openai_api_key),
            )
            if (
                add_file
                and uploaded_file
                and st.session_state.api_key.startswith("sk-")
            ):
                with st.spinner("πŸ’­ Thinking..."):
                    vector_store = load_and_process_file(uploaded_file)

                    if vector_store:
                        msgs.add_ai_message(
                            f"""
                                    File: `{uploaded_file.name}`, processed successfully!

                                    Feel free to ask me any question.
                                    """
                        )


if __name__ == "__main__":
    build_sidebar()
    main()