File size: 4,425 Bytes
1404580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb90a67
1404580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad29e3b
1404580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb90a67
 
1404580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import streamlit as st
import os
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma, Pinecone
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader
import pinecone

# Set the path where you want to save the uploaded PDF file
SAVE_DIR = "pdf"


st.header('Question Answering with your PDF file')
st.write("Are you interested in chatting with your own documents, whether it is a text file, a PDF, or a website? LangChain makes it easy for you to do question answering with your documents.")
def qa(file, query, chain_type, k,api_key_pinecode,index_name,environment_pinecode):
    # load document
    loader = PyPDFLoader(file)
    #loader = UnstructuredPDFLoader(file)
    #loader = OnlinePDFLoader("https://wolfpaulus.com/wp-content/uploads/2017/05/field-guide-to-data-science.pdf")
    documents = loader.load()
    #print("doccs",documents)
    # split the documents into chunks
    # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    # texts = text_splitter.split_documents(documents)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    # select which embeddings we want to use
    embeddings = OpenAIEmbeddings()
    # create the vectorestore to use as the index
    # initialize pinecone
    pinecone.init(
        api_key=api_key_pinecode, # find at app.pinecone.io
        environment=environment_pinecode #"northamerica-northeast1-gcp"  # next to api key in console
    )

    #index_name = "openaiindex"
    index_name = index_name
    #db = Chroma.from_documents(texts, embeddings)
    #db = Pinecone.from_texts(texts, embeddings)
    db = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)
    # expose this index in a retriever interface
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    # create a chain to answer questions
    qa = RetrievalQA.from_chain_type(
        llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)
    result = qa({"query": query})
    print(result['result'])
    return result









with st.sidebar:
    st.header('Configurations')
    st.write("Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account).")
    apikey = st.text_input("Enter your OpenAI API Key here")
    os.environ["OPENAI_API_KEY"] = apikey

    st.write("Enter Pinecode API key.  [Pinecode](https://www.pinecone.io/).")

    apikey2 = st.text_input("Enter your Pinecone Key here")

    enviroment_pinecode = st.text_input("Enter your Pinecone your environment Key")

    index_name = st.text_input("enter index-name")





left_column, right_column = st.columns(2)
    # You can use a column just like st.sidebar:








with left_column:

    # Add a file uploader to the app
    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

    # Check if a file has been uploaded
    if uploaded_file is not None:
        # Save the uploaded file to the specified directory
        file_path = os.path.join(SAVE_DIR, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        st.success(f"File path {file_path}")
    query = st.text_input("enter your question")
    chain_type = st.selectbox(
        'chain type',
        ('stuff', 'map_reduce', "refine", "map_rerank"))
    k = st.slider('Number of relevant chunks', 1, 5)

    if st.button('Loading'):
        # Or even better, call Streamlit functions inside a "with" block:
        result=qa(file_path, query, chain_type, k, apikey2, index_name, enviroment_pinecode)




        with right_column:

            st.write("Output of your question")

            #st.write(result)

            #st.write(result['result'])
            st.subheader("Result")
            st.write(result['result'])

            st.subheader("source_documents")
            st.write(result['source_documents'][0])