suthanhcong commited on
Commit
06e8171
·
1 Parent(s): f906763

Demo Txt and PDF

Browse files
Files changed (2) hide show
  1. app.py +182 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ # import openai
4
+ from io import StringIO
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain import OpenAI, LLMChain, PromptTemplate
7
+ from langchain.memory import ConversationBufferWindowMemory
8
+
9
+ from langchain.vectorstores import Chroma
10
+ from langchain.embeddings import OpenAIEmbeddings
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.document_loaders import TextLoader
13
+ from langchain.document_loaders import PyPDFLoader
14
+ # from langchain.chains import ConversationalRetrievalChain
15
+ from langchain.chains.summarize import load_summarize_chain
16
+ import tempfile
17
+ if "file_uploader_key" not in st.session_state:
18
+ st.session_state["file_uploader_key"] = 0
19
+
20
+ if "uploaded_files" not in st.session_state:
21
+ st.session_state["uploaded_files"] = []
22
+
23
+ # Prompt Template
24
+ template = """You are a chatbot having a conversation with a human.
25
+
26
+ Given the following extracted parts of a long document and a question, create a final answer.
27
+
28
+ {context}
29
+
30
+ {chat_history}
31
+ Human: {human_input}
32
+ Chatbot:"""
33
+
34
+ # Init Prompt
35
+ prompt = PromptTemplate(
36
+ input_variables=["chat_history", "human_input", "context"], template=template
37
+ )
38
+
39
+ a = st.container()
40
+ with a:
41
+ st.title("CHATBOT")
42
+ global openai_api_key
43
+ openai_api_key = st.text_input('OpenAI API Key', type='password')
44
+ if openai_api_key:
45
+ @st.cache_resource
46
+ def llm():
47
+ model = OpenAI(temperature=0.0, openai_api_key=openai_api_key)
48
+ embedding=OpenAIEmbeddings(openai_api_key=openai_api_key)
49
+ return model, embedding
50
+
51
+ llm,embedding = llm()
52
+
53
+ @st.cache_resource
54
+ def chain():
55
+ global memory
56
+ memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input", return_messages=True, k=3)
57
+ chain = LLMChain(
58
+ llm=llm, prompt=prompt, memory=memory
59
+ )
60
+
61
+ return chain
62
+
63
+ global llm_chain
64
+ llm_chain = chain()
65
+
66
+
67
+ summarize_template = """Write a concise summary of the given documents:
68
+ {text}"""
69
+ summarize_PROMPT = PromptTemplate(template=summarize_template, input_variables=["text"])
70
+ llm_summarize = load_summarize_chain(llm=llm, chain_type="map_reduce", map_prompt=summarize_PROMPT)
71
+ # chain({"input_documents": docs}, return_only_outputs=True)
72
+ # llm_summarize = load_summarize_chain(llm, chain_type="map_reduce")
73
+
74
+
75
+ ########################################
76
+ ####### CHATBOT interface#############
77
+ ########################################
78
+ # Initialize chat history
79
+ if "messages" not in st.session_state:
80
+ st.session_state.messages = []
81
+ # Display chat messages from history on app rerun
82
+ with a:
83
+ for message in st.session_state.messages:
84
+ with st.chat_message(message["role"]):
85
+ st.markdown(message["content"])
86
+
87
+ global documents
88
+ documents = []
89
+
90
+
91
+
92
+ with st.sidebar:
93
+ uploaded_files = st.file_uploader("Upload file", accept_multiple_files=True,
94
+ key=st.session_state["file_uploader_key"],
95
+ type=['txt', 'pdf']
96
+ # on_change = check
97
+ )
98
+
99
+ if uploaded_files:
100
+ # files = set([file.name for file in uploaded_files])
101
+ st.session_state["uploaded_files"] = uploaded_files
102
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000 , chunk_overlap=10, separators=[" ", ",", "\n"])
103
+ for file in uploaded_files:
104
+ if file.name.endswith(".pdf"):
105
+ # Save the uploaded file to a temporary location
106
+ temp_file_path = os.path.join('docs', file.name)
107
+ with open(temp_file_path, "wb") as temp_file:
108
+ temp_file.write(file.read())
109
+ loader = PyPDFLoader(temp_file_path)
110
+ # loader = loader.load()
111
+ elif file.name.endswith('.txt'):
112
+ # To read file as bytes:
113
+ bytes_data = file.getvalue()
114
+ # To convert to a string based IO:
115
+ stringio = StringIO(file.getvalue().decode("utf-8"))
116
+ # To read file as string:
117
+ loader = stringio.read()
118
+ filename = os.path.join("docs",'text.txt')
119
+ # filename = 'docs/text.txt'
120
+ with open(filename,"wb") as f:
121
+ f.write(file.getbuffer())
122
+ loader = TextLoader(filename, autodetect_encoding=True)
123
+ loader = loader.load()
124
+ documents.extend(loader)
125
+ documents = text_splitter.split_documents(documents)
126
+
127
+ # Embedding
128
+ global docsearch
129
+ docsearch = Chroma.from_documents(documents,
130
+ embedding=embedding)
131
+
132
+ ########################################
133
+ ########## SIDEBAR ###############
134
+ ########################################
135
+
136
+ # create a function that sets the value in state back to an empty list
137
+ def clear_msg():
138
+ st.session_state.messages = []
139
+ llm_chain = chain()
140
+ st.session_state["file_uploader_key"] += 1
141
+ st.experimental_rerun()
142
+
143
+ if uploaded_files:
144
+ if st.sidebar.button('Summarize'):
145
+ with a:
146
+ query = 'Summarize uploaded documents'
147
+ st.chat_message("user").markdown(query)
148
+ llm_chain.memory.chat_memory.add_user_message(query)
149
+ # Add user message to chat history
150
+ st.session_state.messages.append({"role": "user", "content": query})
151
+ response = llm_summarize.run(documents)
152
+ # chain({"input_documents": docs}, return_only_outputs=True)
153
+
154
+ with st.chat_message("assistant"):
155
+ st.markdown(response)
156
+ llm_chain.memory.chat_memory.add_ai_message(response)
157
+ # Add assistant response to chat history
158
+ st.session_state.messages.append({"role": "assistant", "content": response})
159
+
160
+
161
+ st.sidebar.button("Clear", on_click=clear_msg)
162
+
163
+ ########################################
164
+ ####### React to user input#############
165
+ ########################################
166
+
167
+ with a:
168
+ if query := st.chat_input():
169
+ # Display user message in chat message container
170
+ st.chat_message("user").markdown(query)
171
+ # Add user message to chat history
172
+ st.session_state.messages.append({"role": "user", "content": query})
173
+ if documents:
174
+ docs = docsearch.similarity_search(query)
175
+ else:
176
+ docs = 'No Context provide'
177
+ response = llm_chain.run({"context": docs, "human_input": query})
178
+ # Display assistant response in chat message container
179
+ with st.chat_message("assistant"):
180
+ st.markdown(response)
181
+ # Add assistant response to chat history
182
+ st.session_state.messages.append({"role": "assistant", "content": response})
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ openai
3
+ langchain
4
+ tiktoken
5
+ chromadb
6
+ pypdf
7
+ chardet