File size: 6,179 Bytes
781a2e4
 
 
 
 
a43a4a7
 
 
781a2e4
 
a43a4a7
781a2e4
 
 
 
a43a4a7
781a2e4
 
 
 
 
 
 
 
 
 
 
 
 
 
a43a4a7
781a2e4
 
 
 
 
a43a4a7
 
781a2e4
 
 
 
 
a43a4a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781a2e4
a43a4a7
 
781a2e4
 
a43a4a7
 
781a2e4
 
a43a4a7
781a2e4
 
a43a4a7
781a2e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a43a4a7
781a2e4
 
a43a4a7
781a2e4
a43a4a7
781a2e4
 
 
 
 
 
a43a4a7
781a2e4
 
 
 
 
 
 
 
 
a43a4a7
 
781a2e4
 
 
 
 
 
 
 
 
 
 
 
 
a43a4a7
781a2e4
 
 
 
 
a43a4a7
 
 
 
 
 
 
 
 
 
 
781a2e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Reference https://huggingface.co/spaces/johnmuchiri/anspro1/blob/main/app.py
# Resource https://python.langchain.com/docs/modules/chains

import streamlit as st
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
import openai
from dotenv import load_dotenv
import os

import pinecone

load_dotenv()

# please create a streamlit app on huggingface that uses openai api
# and langchain data framework, the user should be able to upload
# a document and ask questions about the document, the app should
# respond with an answer and also display where the response is
# referenced from using some sort of visual annotation on the document

# set the path where you want to save the uploaded PDF file
SAVE_DIR = "pdf"


def generate_response(pages, query_text, k, chain_type):
    if pages:
        pinecone.init(
            api_key=os.getenv("PINECONE_API_KEY"),
            environment=os.getenv("PINECONE_ENV_NAME"),
        )

        vector_db = Pinecone.from_documents(
            documents=pages, embedding=OpenAIEmbeddings(), index_name="document-chat"
        )

        retriever = vector_db.as_retriever(
            search_type="similarity", search_kwards={"k": k}
        )

        prompt_template = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a helpful assistant that can answer questions regarding to a document provided by the user.",
                ),
                ("human", "Hello, how are you doing?"),
                ("ai", "I'm doing well, thanks!"),
                ("human", "{user_input}"),
            ]
        )

        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

        # create a chain to answer questions
        qa = RetrievalQAWithSourcesChain.from_chain_type(
            llm=llm,
            chain_type=chain_type,
            retriever=retriever,
            return_source_documents=True,
            # prompt_template=prompt_template,
        )

        response = qa({"question": query_text})
        return response


def visual_annotate(document, answer):
    # Implement this function according to your specific requirements
    # Highlight the part of the document where the answer was found
    start = document.find(answer)
    annotated_document = (
        document[:start]
        + "**"
        + document[start : start + len(answer)]
        + "**"
        + document[start + len(answer) :]
    )
    return annotated_document


st.set_page_config(page_title="πŸ¦œπŸ”— Ask the Doc App")
st.title("Document Question Answering App")

with st.sidebar.form(key="sidebar-form"):
    st.header("Configurations")

    openai_api_key = st.text_input("Enter OpenAI API key here", type="password")
    os.environ["OPENAI_API_KEY"] = openai_api_key

    pinecone_api_key = st.text_input(
        "Enter your Pinecone environment key", type="password"
    )
    os.environ["PINECONE_API_KEY"] = pinecone_api_key

    pinecone_env_name = st.text_input("Enter your Pinecone environment name")
    os.environ["PINECONE_ENV_NAME"] = pinecone_env_name

    submitted = st.form_submit_button(
        label="Submit",
        # disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
    )

left_column, right_column = st.columns(2)

with left_column:
    uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
    pages = []

    if uploaded_file is not None:
        # save the uploaded file to the specified directory
        file_path = os.path.join(SAVE_DIR, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        st.success(f"File {uploaded_file.name} is saved at path {file_path}")

        loader = PyPDFLoader(file_path=file_path)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
        pages = loader.load_and_split(text_splitter=text_splitter)

    query_text = st.text_input(
        "Enter your question:", placeholder="Please provide a short summary."
    )

    chain_type = st.selectbox(
        "chain type", ("stuff", "map_reduce", "refine", "map_rerank")
    )

    k = st.slider("Number of relevant chunks", 1, 5)

    with st.spinner("Retrieving and generating a response ..."):
        response = generate_response(
            pages=pages, query_text=query_text, k=k, chain_type=chain_type
        )

        with right_column:
            st.write("Output of your question")

            if response:
                st.subheader("Result")
                st.write(response["answer"])
                print("response: ", response)

                st.subheader("source_documents")
                for each in response["source_documents"]:
                    st.write("page: ", each.metadata["page"])
                    st.write("source: ", each.metadata["source"])
            else:
                st.write("response not showing at the moment")


# with st.form("myform", clear_on_submit=True):
#     openai_api_key = st.text_input(
#         "OpenAI API Key", type="password", disabled=not (uploaded_file and query_text)
#     )
#     submitted = st.form_submit_button(
#         "Submit", disabled=not (pages and query_text)
#     )
#     if submitted and openai_api_key.startswith("sk-"):
#         with st.spinner("Calculating..."):
#             response = generate_response(pages, openai_api_key, query_text)
#             result.append(response)
#             del openai_api_key

# if len(result):
#     st.info(response)

# if st.button("Get Answer"):
#     answer = get_answer(question, document)
#     st.write(answer["answer"])

#     # Visual annotation on the document
#     annotated_document = visual_annotate(document, answer["answer"])
#     st.markdown(annotated_document)