File size: 3,949 Bytes
0b1eb85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
from llama_index import download_loader
from pandasai.llm.openai import OpenAI
from matplotlib import pyplot as plt
import streamlit as st
import pandas as pd
import os


documents_folder = "./documents"

# Load PandasAI loader, Which is a wrapper over PandasAI library
PandasAIReader = download_loader("PandasAIReader")

st.title("Welcome to `ChatwithDocs`")
st.header("Interact with Documents such as `PDFs/CSV/Docs` using the power of LLMs\nPowered by `LlamaIndex🦙` \nCheckout the [GITHUB Repo Here](https://github.com/anoopshrma/Chat-with-Docs) and Leave a star⭐")


def get_csv_result(df, query):
  reader = PandasAIReader(llm=csv_llm)
  response = reader.run_pandas_ai(
    df, 
    query, 
    is_conversational_answer=False
    )
  return response

def save_file(doc):        
    fn = os.path.basename(doc.name)
    # open read and write the file into the server
    open(documents_folder+'/'+fn, 'wb').write(doc.read())
    # Check for the current filename, If new filename
    # clear the previous cached vectors and update the filename 
    # with current name     
    if st.session_state.get('file_name'):
        if st.session_state.file_name != fn:
            st.cache_resource.clear()
            st.session_state['file_name'] = fn
    else:
        st.session_state['file_name'] = fn

    return fn

def remove_file(file_path):
    # Remove the file from the Document folder once 
    # vectors are created
    if os.path.isfile(documents_folder+'/'+file_path):
        os.remove(documents_folder+'/'+file_path)

    

@st.cache_resource
def create_index():
    # Create vectors for the file stored under Document folder. 
    # NOTE: You can create vectors for multiple files at once.
    documents = SimpleDirectoryReader(documents_folder).load_data()
    index = GPTVectorStoreIndex.from_documents(documents)
    return index



def query_doc(vector_index, query):
    # Applies Similarity Algo, Finds the nearest match and 
    # take the match and user query to OpenAI for rich response
    query_engine = vector_index.as_query_engine()
    response = query_engine.query(query)
    return response


api_key = st.text_input("Enter your OpenAI API key here:", type="password")
if api_key:
    os.environ['OPENAI_API_KEY'] = api_key
    csv_llm = OpenAI(api_token=api_key)


tab1, tab2= st.tabs(["CSV", "PDFs/Docs"])

with tab1:
   
   st.write("Chat with CSV files using PandasAI loader with LlamaIndex")
   input_csv = st.file_uploader("Upload your CSV file", type=['csv'])
   
   if input_csv is not None:      
    st.info("CSV Uploaded Successfully")
    df = pd.read_csv(input_csv)
    st.dataframe(df, use_container_width=True)
    

   st.divider()
   
   input_text = st.text_area("Ask your query")
   
   if input_text is not None:
    if st.button("Send"):
      st.info("Your query: "+ input_text)
      with st.spinner('Processing your query...'):
        response = get_csv_result(df, input_text)
      if plt.get_fignums():
          st.pyplot(plt.gcf())
      else:
        st.success(response)

    
with tab2:
   st.write("Chat with PDFs/Docs")
   input_doc = st.file_uploader("Upload your Docs")
   
   if input_doc is not None: 
       st.info("Doc Uploaded Successfully")
       file_name = save_file(input_doc)
       index = create_index()
       remove_file(file_name)
       
   
   st.divider()
   input_text = st.text_area("Ask your question")
   
   if input_text is not None:
    if st.button("Ask"):
        st.info("Your query: \n" +input_text)
        with st.spinner("Processing your query.."):
            response = query_doc(index, input_text)
            print(response)
        
        st.success(response)

        st.divider()
        # Shows the source documents context which 
        # has been used to prepare the response
        st.write("Source Documents")
        st.write(response.get_formatted_sources())