Files changed (1) hide show
  1. app.py +148 -148
app.py CHANGED
@@ -1,26 +1,26 @@
1
- import google.generativeai as palm
2
- import streamlit as st
3
- import os
4
 
5
- # Set your API key
6
- palm.configure(api_key = os.environ['PALM_KEY'])
7
 
8
- # Select the PaLM 2 model
9
- model = 'models/text-bison-001'
10
 
11
- # Generate text
12
- if prompt := st.chat_input("Ask your query..."):
13
- enprom = f"""Answer the below provided input in context to Bhagwad Geeta. Use the verses and chapters sentences as references to your answer with suggestions
14
- coming from Bhagwad Geeta. Your answer to below input should only be in context to Bhagwad geeta only.\nInput= {prompt}"""
15
- completion = palm.generate_text(model=model, prompt=enprom, temperature=0.5, max_output_tokens=800)
16
 
17
- # response = palm.chat(messages=["Hello."])
18
- # print(response.last) # 'Hello! What can I help you with?'
19
- # response.reply("Can you tell me a joke?")
20
 
21
- # Print the generated text
22
- with st.chat_message("Assistant"):
23
- st.write(completion.result)
24
 
25
 
26
 
@@ -29,168 +29,168 @@ if prompt := st.chat_input("Ask your query..."):
29
 
30
 
31
 
32
- # import streamlit as st
33
- # from dotenv import load_dotenv
34
- # from PyPDF2 import PdfReader
35
- # from langchain.text_splitter import CharacterTextSplitter
36
- # from langchain.embeddings import HuggingFaceEmbeddings
37
- # from langchain.vectorstores import FAISS
38
- # # from langchain.chat_models import ChatOpenAI
39
- # from langchain.memory import ConversationBufferMemory
40
- # from langchain.chains import ConversationalRetrievalChain
41
- # from htmlTemplates import css, bot_template, user_template
42
- # from langchain.llms import HuggingFaceHub
43
- # import os
44
- # # from transformers import T5Tokenizer, T5ForConditionalGeneration
45
- # # from langchain.callbacks import get_openai_callback
46
-
47
- # hub_token = os.environ["HUGGINGFACE_HUB_TOKEN"]
48
-
49
- # def get_pdf_text(pdf_docs):
50
- # text = ""
51
- # for pdf in pdf_docs:
52
- # pdf_reader = PdfReader(pdf)
53
- # for page in pdf_reader.pages:
54
- # text += page.extract_text()
55
- # return text
 
56
 
 
 
 
 
 
 
 
 
 
57
 
58
- # def get_text_chunks(text):
59
- # text_splitter = CharacterTextSplitter(
60
- # separator="\n",
61
- # chunk_size=200,
62
- # chunk_overlap=20,
63
- # length_function=len
64
- # )
65
- # chunks = text_splitter.split_text(text)
66
- # return chunks
67
-
68
-
69
- # def get_vectorstore(text_chunks):
70
- # # embeddings = OpenAIEmbeddings()
71
- # # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
72
- # embeddings = HuggingFaceEmbeddings()
73
- # vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
74
- # return vectorstore
75
 
 
 
 
 
 
 
76
 
77
- # def get_conversation_chain(vectorstore):
78
- # # llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
79
- # # tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
80
- # # model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
81
 
82
- # llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", huggingfacehub_api_token=hub_token, model_kwargs={"temperature":0.5, "max_length":20})
 
 
 
83
 
84
- # memory = ConversationBufferMemory(
85
- # memory_key='chat_history', return_messages=True)
86
- # conversation_chain = ConversationalRetrievalChain.from_llm(
87
- # llm=llm,
88
- # retriever=vectorstore.as_retriever(),
89
- # memory=memory
90
- # )
91
- # return conversation_chain
92
 
 
 
 
 
 
 
 
 
93
 
94
- # def handle_userinput(user_question):
95
- # response = st.session_state.conversation
96
- # reply = response.run(user_question)
97
- # st.write(reply)
98
- # # st.session_state.chat_history = response['chat_history']
99
 
100
- # # for i, message in enumerate(st.session_state.chat_history):
101
- # # if i % 2 == 0:
102
- # # st.write(user_template.replace(
103
- # # "{{MSG}}", message.content), unsafe_allow_html=True)
104
- # # else:
105
- # # st.write(bot_template.replace(
106
- # # "{{MSG}}", message.content), unsafe_allow_html=True)
107
 
 
 
 
 
 
 
 
108
 
109
- # def main():
110
- # load_dotenv()
111
- # st.set_page_config(page_title="Chat with multiple PDFs",
112
- # page_icon=":books:")
113
- # st.write(css, unsafe_allow_html=True)
114
 
115
- # if "conversation" not in st.session_state:
116
- # st.session_state.conversation = None
117
- # if "chat_history" not in st.session_state:
118
- # st.session_state.chat_history = None
 
119
 
120
- # st.header("Chat with multiple PDFs :books:")
121
- # user_question = st.text_input("Ask a question about your documents:")
122
- # if user_question:
123
- # handle_userinput(user_question)
124
 
125
- # with st.sidebar:
126
- # st.subheader("Your documents")
127
- # pdf_docs = st.file_uploader(
128
- # "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
129
- # if st.button("Process"):
130
- # if(len(pdf_docs) == 0):
131
- # st.error("Please upload at least one PDF")
132
- # else:
133
- # with st.spinner("Processing"):
134
- # # get pdf text
135
- # raw_text = get_pdf_text(pdf_docs)
136
 
137
- # # get the text chunks
138
- # text_chunks = get_text_chunks(raw_text)
 
 
 
 
 
 
 
 
 
139
 
140
- # # create vector store
141
- # vectorstore = get_vectorstore(text_chunks)
142
 
143
- # # create conversation chain
144
- # st.session_state.conversation = get_conversation_chain(
145
- # vectorstore)
146
 
147
- # if __name__ == '__main__':
148
- # main()
 
 
 
 
149
 
150
 
151
 
152
 
153
 
154
 
155
- # # import os
156
- # # import getpass
157
- # # import streamlit as st
158
- # # from langchain.document_loaders import PyPDFLoader
159
- # # from langchain.text_splitter import RecursiveCharacterTextSplitter
160
- # # from langchain.embeddings import HuggingFaceEmbeddings
161
- # # from langchain.vectorstores import Chroma
162
- # # from langchain import HuggingFaceHub
163
- # # from langchain.chains import RetrievalQA
164
- # # # __import__('pysqlite3')
165
- # # # import sys
166
- # # # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
167
 
168
 
169
- # # # load huggingface api key
170
- # # hubtok = os.environ["HUGGINGFACE_HUB_TOKEN"]
171
 
172
- # # # use streamlit file uploader to ask user for file
173
- # # # file = st.file_uploader("Upload PDF")
174
 
175
 
176
- # # path = "Geeta.pdf"
177
- # # loader = PyPDFLoader(path)
178
- # # pages = loader.load()
179
 
180
- # # # st.write(pages)
181
 
182
- # # splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
183
- # # docs = splitter.split_documents(pages)
184
 
185
- # # embeddings = HuggingFaceEmbeddings()
186
- # # doc_search = Chroma.from_documents(docs, embeddings)
187
 
188
- # # repo_id = "tiiuae/falcon-7b"
189
- # # llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=hubtok, model_kwargs={'temperature': 0.2,'max_length': 1000})
190
 
191
- # # from langchain.schema import retriever
192
- # # retireval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_search.as_retriever())
193
 
194
- # # if query := st.chat_input("Enter a question: "):
195
- # # with st.chat_message("assistant"):
196
- # # st.write(retireval_chain.run(query))
 
1
+ # import google.generativeai as palm
2
+ # import streamlit as st
3
+ # import os
4
 
5
+ # # Set your API key
6
+ # palm.configure(api_key = os.environ['PALM_KEY'])
7
 
8
+ # # Select the PaLM 2 model
9
+ # model = 'models/text-bison-001'
10
 
11
+ # # Generate text
12
+ # if prompt := st.chat_input("Ask your query..."):
13
+ # enprom = f"""Answer the below provided input in context to Bhagwad Geeta. Use the verses and chapters sentences as references to your answer with suggestions
14
+ # coming from Bhagwad Geeta. Your answer to below input should only be in context to Bhagwad geeta only.\nInput= {prompt}"""
15
+ # completion = palm.generate_text(model=model, prompt=enprom, temperature=0.5, max_output_tokens=800)
16
 
17
+ # # response = palm.chat(messages=["Hello."])
18
+ # # print(response.last) # 'Hello! What can I help you with?'
19
+ # # response.reply("Can you tell me a joke?")
20
 
21
+ # # Print the generated text
22
+ # with st.chat_message("Assistant"):
23
+ # st.write(completion.result)
24
 
25
 
26
 
 
29
 
30
 
31
 
32
+ import streamlit as st
33
+ from dotenv import load_dotenv
34
+ from PyPDF2 import PdfReader
35
+ from langchain.text_splitter import CharacterTextSplitter
36
+ from langchain.embeddings import HuggingFaceEmbeddings
37
+ from langchain.vectorstores import FAISS
38
+ # from langchain.chat_models import ChatOpenAI
39
+ from langchain.memory import ConversationBufferMemory
40
+ from langchain.chains import ConversationalRetrievalChain
41
+ from htmlTemplates import css, bot_template, user_template
42
+ from langchain.llms import HuggingFaceHub
43
+ import os
44
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
45
+ # from langchain.callbacks import get_openai_callback
46
+
47
+ hub_token = os.environ["HUGGINGFACE_HUB_TOKEN"]
48
+
49
+ def get_pdf_text(pdf_docs):
50
+ text = ""
51
+ for pdf in pdf_docs:
52
+ pdf_reader = PdfReader(pdf)
53
+ for page in pdf_reader.pages:
54
+ text += page.extract_text()
55
+ return text
56
+
57
 
58
+ def get_text_chunks(text):
59
+ text_splitter = CharacterTextSplitter(
60
+ separator="\n",
61
+ chunk_size=200,
62
+ chunk_overlap=20,
63
+ length_function=len
64
+ )
65
+ chunks = text_splitter.split_text(text)
66
+ return chunks
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ def get_vectorstore(text_chunks):
70
+ # embeddings = OpenAIEmbeddings()
71
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
72
+ embeddings = HuggingFaceEmbeddings()
73
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
74
+ return vectorstore
75
 
 
 
 
 
76
 
77
+ def get_conversation_chain(vectorstore):
78
+ # llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k")
79
+ tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
80
+ model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
81
 
82
+ llm = HuggingFaceHub(repo_id="mistralai/Mistral-7B-v0.1", huggingfacehub_api_token=hub_token, model_kwargs={"temperature":0.5, "max_length":20})
 
 
 
 
 
 
 
83
 
84
+ memory = ConversationBufferMemory(
85
+ memory_key='chat_history', return_messages=True)
86
+ conversation_chain = ConversationalRetrievalChain.from_llm(
87
+ llm=llm,
88
+ retriever=vectorstore.as_retriever(),
89
+ memory=memory
90
+ )
91
+ return conversation_chain
92
 
 
 
 
 
 
93
 
94
+ def handle_userinput(user_question):
95
+ response = st.session_state.conversation
96
+ reply = response.run(user_question)
97
+ st.write(reply)
98
+ st.session_state.chat_history = response['chat_history']
 
 
99
 
100
+ for i, message in enumerate(st.session_state.chat_history):
101
+ if i % 2 == 0:
102
+ st.write(user_template.replace(
103
+ "{{MSG}}", message.content), unsafe_allow_html=True)
104
+ else:
105
+ st.write(bot_template.replace(
106
+ "{{MSG}}", message.content), unsafe_allow_html=True)
107
 
 
 
 
 
 
108
 
109
+ def main():
110
+ load_dotenv()
111
+ st.set_page_config(page_title="Chat with multiple PDFs",
112
+ page_icon=":books:")
113
+ st.write(css, unsafe_allow_html=True)
114
 
115
+ if "conversation" not in st.session_state:
116
+ st.session_state.conversation = None
117
+ if "chat_history" not in st.session_state:
118
+ st.session_state.chat_history = None
119
 
120
+ st.header("Chat with multiple PDFs :books:")
121
+ user_question = st.text_input("Ask a question about your documents:")
122
+ if user_question:
123
+ handle_userinput(user_question)
 
 
 
 
 
 
 
124
 
125
+ with st.sidebar:
126
+ st.subheader("Your documents")
127
+ pdf_docs = st.file_uploader(
128
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
129
+ if st.button("Process"):
130
+ if(len(pdf_docs) == 0):
131
+ st.error("Please upload at least one PDF")
132
+ else:
133
+ with st.spinner("Processing"):
134
+ # get pdf text
135
+ raw_text = get_pdf_text(pdf_docs)
136
 
137
+ # get the text chunks
138
+ text_chunks = get_text_chunks(raw_text)
139
 
140
+ # create vector store
141
+ vectorstore = get_vectorstore(text_chunks)
 
142
 
143
+ # create conversation chain
144
+ st.session_state.conversation = get_conversation_chain(
145
+ vectorstore)
146
+
147
+ if __name__ == '__main__':
148
+ main()
149
 
150
 
151
 
152
 
153
 
154
 
155
+ # import os
156
+ # import getpass
157
+ # import streamlit as st
158
+ # from langchain.document_loaders import PyPDFLoader
159
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
160
+ # from langchain.embeddings import HuggingFaceEmbeddings
161
+ # from langchain.vectorstores import Chroma
162
+ # from langchain import HuggingFaceHub
163
+ # from langchain.chains import RetrievalQA
164
+ # # __import__('pysqlite3')
165
+ # # import sys
166
+ # # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
167
 
168
 
169
+ # # load huggingface api key
170
+ # hubtok = os.environ["HUGGINGFACE_HUB_TOKEN"]
171
 
172
+ # # use streamlit file uploader to ask user for file
173
+ # # file = st.file_uploader("Upload PDF")
174
 
175
 
176
+ # path = "Geeta.pdf"
177
+ # loader = PyPDFLoader(path)
178
+ # pages = loader.load()
179
 
180
+ # # st.write(pages)
181
 
182
+ # splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
183
+ # docs = splitter.split_documents(pages)
184
 
185
+ # embeddings = HuggingFaceEmbeddings()
186
+ # doc_search = Chroma.from_documents(docs, embeddings)
187
 
188
+ # repo_id = "tiiuae/falcon-7b"
189
+ # llm = HuggingFaceHub(repo_id=repo_id, huggingfacehub_api_token=hubtok, model_kwargs={'temperature': 0.2,'max_length': 1000})
190
 
191
+ # from langchain.schema import retriever
192
+ # retireval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_search.as_retriever())
193
 
194
+ # if query := st.chat_input("Enter a question: "):
195
+ # with st.chat_message("assistant"):
196
+ # st.write(retireval_chain.run(query))