sartajbhuvaji commited on
Commit
180715b
β€’
1 Parent(s): b300016

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +116 -0
  2. html_template.py +38 -0
  3. requirements.txt +11 -0
app.py CHANGED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from html_template import css, bot_template, user_template
10
+ from langchain.llms import HuggingFaceHub
11
+ import os
12
+
13
+ FREE_RUN = False
14
+
15
+ def get_pdf_text(pdf_docs):
16
+ text = ""
17
+ for pdf in pdf_docs:
18
+ pdf_reader = PdfReader(pdf)
19
+ for page in pdf_reader.pages:
20
+ text += page.extract_text()
21
+ return text
22
+
23
+ def get_text_chunks(text):
24
+ text_splitter = CharacterTextSplitter(
25
+ separator="\n",
26
+ chunk_size=1000,
27
+ chunk_overlap=200,
28
+ length_function=len
29
+ )
30
+ chunks = text_splitter.split_text(text)
31
+ return chunks
32
+
33
+ def get_vector_store(text_chunks):
34
+ embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") if FREE_RUN else OpenAIEmbeddings()
35
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
36
+ return vectorstore
37
+
38
+ def get_conversation_chain(vectorstore):
39
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={
40
+ "temperature": 0.5, "max_length": 512}) if FREE_RUN else ChatOpenAI()
41
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
42
+ conversation_chain = ConversationalRetrievalChain.from_llm(
43
+ llm=llm,
44
+ retriever=vectorstore.as_retriever(),
45
+ memory=memory
46
+ )
47
+ return conversation_chain
48
+
49
+ def handle_userinput(user_question):
50
+ response = st.session_state.conversation({'question': user_question})
51
+ st.session_state.chat_history = response['chat_history']
52
+
53
+ for i, message in enumerate(st.session_state.chat_history):
54
+ if i % 2 == 0:
55
+ st.write(user_template.replace(
56
+ "{{MSG}}", message.content), unsafe_allow_html=True)
57
+ else:
58
+ st.write(bot_template.replace(
59
+ "{{MSG}}", message.content), unsafe_allow_html=True)
60
+
61
+ def main():
62
+ st.set_page_config(page_title="WhisperChain πŸ”—", page_icon=":link:")
63
+ st.write(css, unsafe_allow_html=True)
64
+
65
+ if "conversation" not in st.session_state:
66
+ st.session_state.conversation = None
67
+ if "chat_history" not in st.session_state:
68
+ st.session_state.chat_history = None
69
+
70
+ st.header("WhisperChain πŸ”—")
71
+ user_question = st.text_input("Ask a question about your documents.")
72
+
73
+ if user_question:
74
+ handle_userinput(user_question)
75
+
76
+ with st.sidebar:
77
+
78
+ ###
79
+ OPENAI_API_KEY = st.sidebar.text_input("Enter OpenAI API Key", type="password")
80
+ HUGGINGFACEHUB_API_KEY = st.sidebar.text_input("Enter Hugging Face API Key", type="password")
81
+
82
+ if not OPENAI_API_KEY or not HUGGINGFACEHUB_API_KEY:
83
+ st.sidebar.error("Please enter your API keys")
84
+ st.stop()
85
+
86
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
87
+ os.environ["HUGGINGFACEHUB_API_KEY"] = HUGGINGFACEHUB_API_KEY
88
+
89
+ #Toggle free run
90
+ global FREE_RUN
91
+ FREE_RUN = st.sidebar.checkbox("Free run", value=False)
92
+ ###
93
+
94
+ pdf_docs = st.file_uploader(
95
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
96
+
97
+ if st.button("Process"):
98
+ if pdf_docs:
99
+ with st.spinner("Processing"):
100
+
101
+ # get pdf text
102
+ raw_text = get_pdf_text(pdf_docs)
103
+
104
+ # get the text chunks
105
+ text_chunks = get_text_chunks(raw_text)
106
+
107
+ # create vector store
108
+ vector_store = get_vector_store(text_chunks)
109
+
110
+ # create conversation chain
111
+ st.session_state.conversation = get_conversation_chain(vector_store)
112
+ else:
113
+ st.error("Please upload at least one PDF")
114
+
115
+ if __name__ == '__main__':
116
+ main()
html_template.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="message">{{MSG}}</div>
31
+ </div>
32
+ '''
33
+
34
+ user_template = '''
35
+ <div class="chat-message user">
36
+ <div class="message">{{MSG}}</div>
37
+ </div>
38
+ '''
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pypdf2
3
+ langchain
4
+ python-dotenv
5
+ faiss-cpu
6
+ openai
7
+ huggingface_hub
8
+ tiktoken
9
+ InstructorEmbedding
10
+ sentence_transformers
11
+ ```