tofuliang commited on
Commit
c5faff0
1 Parent(s): c0f8580
Files changed (3) hide show
  1. app.py +178 -0
  2. requirements.txt +8 -0
  3. 边城.txt +0 -0
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+
4
+ import gradio as gr
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain import LLMMathChain
7
+ from langchain.schema import HumanMessage, AIMessage
8
+ from langchain.utilities import GoogleSerperAPIWrapper, WikipediaAPIWrapper
9
+ from langchain.agents import initialize_agent, Tool, AgentType
10
+ from langchain.embeddings.openai import OpenAIEmbeddings
11
+ from langchain.vectorstores import Chroma
12
+ from langchain.docstore.document import Document
13
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
14
+ from langchain.llms import OpenAI
15
+
16
+ embedding = OpenAIEmbeddings()
17
+ persistDirectory = 'db'
18
+
19
+
20
+ def initialize():
21
+ llm = ChatOpenAI(temperature=0)
22
+
23
+ search = GoogleSerperAPIWrapper()
24
+ wiki = WikipediaAPIWrapper(top_k_results=1)
25
+
26
+ llmMathChain = LLMMathChain.from_llm(llm=llm)
27
+ tools = [
28
+ Tool(
29
+ name="Calculator",
30
+ func=llmMathChain.run,
31
+ description="useful for when you need to answer questions about math"
32
+ ),
33
+ Tool(
34
+ name="wikipedia",
35
+ func=wiki.run,
36
+ description="useful for when you need to answer questions about historical entity. the input to this should be a single search term."
37
+ ),
38
+ Tool(
39
+ name="Current Search",
40
+ func=search.run,
41
+ description="useful for when you need to answer questions about current events or the current state of the world, also useful if there is no wikipedia result. the input to this should be a single search term."
42
+ )
43
+ ]
44
+
45
+ from langchain.memory import ConversationBufferMemory
46
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
47
+
48
+ chatbotEngine = initialize_agent(
49
+ tools,
50
+ llm,
51
+ agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
52
+ verbose=True,
53
+ memory=memory)
54
+ return chatbotEngine
55
+
56
+
57
+ chatbotEngine = initialize()
58
+
59
+
60
+ def chatWithAgent(chatHistory, message=""):
61
+ if not message.strip():
62
+ return chatHistory, chatHistory, ""
63
+
64
+ try:
65
+ result = chatbotEngine.run(message.strip())
66
+ except ValueError:
67
+ result = "I can't handle this request, please try something else."
68
+
69
+ chatHistory.append((message, result))
70
+ return chatHistory, chatHistory, ""
71
+
72
+
73
+ def chatWithOpenAI(question, chatHistory):
74
+ chatOpenAi = ChatOpenAI()
75
+ if len(chatHistory) == 0:
76
+ ans = chatOpenAi([HumanMessage(content=question)])
77
+ else:
78
+ messages = []
79
+ for i in range(len(chatHistory[0])):
80
+ if i % 2 == 0:
81
+ messages.append(HumanMessage(content=chatHistory[0][i]))
82
+ else:
83
+ messages.append(AIMessage(content=chatHistory[0][i]))
84
+ messages.append(HumanMessage(content=question))
85
+ ans = chatOpenAi(messages)
86
+ chatHistory.append((question, ans.content))
87
+ return "", chatHistory
88
+
89
+ def splitParagraph(text, pdf_name, maxLength=300):
90
+ text = text.replace('\n', '')
91
+ text = text.replace('\n\n', '')
92
+ text = re.sub(r'\s+', ' ', text)
93
+
94
+ sentences = re.split('(;|。|!|\!|\.|?|\?)', text)
95
+
96
+ newSents = []
97
+ for i in range(int(len(sentences) / 2)):
98
+ sent = sentences[2 * i] + sentences[2 * i + 1]
99
+ newSents.append(sent)
100
+ if len(sentences) % 2 == 1:
101
+ newSents.append(sentences[len(sentences) - 1])
102
+
103
+ paragraphs = []
104
+ current_length = 0
105
+ current_paragraph = ""
106
+ for sentence in newSents:
107
+ sentence_length = len(sentence)
108
+ if current_length + sentence_length <= maxLength:
109
+ current_paragraph += sentence
110
+ current_length += sentence_length
111
+ else:
112
+ paragraphs.append(current_paragraph.strip())
113
+ current_paragraph = sentence
114
+ current_length = sentence_length
115
+ paragraphs.append(current_paragraph.strip())
116
+ documents = []
117
+ metadata = {"source": pdf_name}
118
+ for paragraph in paragraphs:
119
+ new_doc = Document(page_content=paragraph, metadata=metadata)
120
+ documents.append(new_doc)
121
+ return documents
122
+
123
+
124
+ def askWithEmbedding(question, chatHistory):
125
+ # Empty msg
126
+ if not question.strip():
127
+ return "", chatHistory
128
+
129
+ vectordb = Chroma(persist_directory=persistDirectory, embedding_function=embedding)
130
+ retriever = vectordb.as_retriever(search_type="mmr")
131
+ docs = retriever.get_relevant_documents(question)
132
+ chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
133
+ result = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
134
+ ans = result['output_text']
135
+ chatHistory.append((question, ans))
136
+
137
+ return "", chatHistory
138
+
139
+
140
+ with gr.Blocks() as demo:
141
+ gr.Markdown("""<h1><center>Chat with OpenAI!</center></h1>""")
142
+ # Declaring states
143
+ chatWithOpenAIHistory = gr.State([])
144
+ chatWithOpenAIChatBot = gr.Chatbot()
145
+ chatWithOpenAIMessage = gr.Textbox()
146
+ chatWithOpenAISubmit = gr.Button("SEND")
147
+ chatWithOpenAISubmit.click(chatWithOpenAI, inputs=[chatWithOpenAIMessage, chatWithOpenAIChatBot],
148
+ outputs=[chatWithOpenAIMessage, chatWithOpenAIChatBot])
149
+
150
+ gr.Markdown("""<h1><center>Chat with your online-connected bot!</center></h1>""")
151
+ # Declaring states
152
+ chatWithAgentHistory = gr.State([])
153
+ chatbot = gr.Chatbot()
154
+ message = gr.Textbox()
155
+ submit = gr.Button("SEND")
156
+ submit.click(chatWithAgent, inputs=[chatWithAgentHistory, message],
157
+ outputs=[chatbot, chatWithAgentHistory, message])
158
+
159
+ gr.Markdown("""<h1><center>Ask anything about 《边城》!</center></h1>""")
160
+ embeddingChatHistory = gr.State([])
161
+ embeddingChatBot = gr.Chatbot()
162
+ embeddingMessage = gr.Textbox()
163
+ embeddingSubmit = gr.Button("ASK")
164
+ embeddingSubmit.click(askWithEmbedding, inputs=[embeddingMessage, embeddingChatBot],
165
+ outputs=[embeddingMessage, embeddingChatBot])
166
+
167
+ if not os.path.exists(persistDirectory):
168
+ with open("./边城.txt") as f:
169
+ state_of_the_union = f.read()
170
+ documents = splitParagraph(state_of_the_union, "边城.txt")
171
+ vectordb = Chroma.from_documents(documents=documents, embedding=embedding,
172
+ persist_directory=persistDirectory)
173
+ vectordb.persist()
174
+ vectordb = None
175
+
176
+ if __name__ == "__main__":
177
+ demo.launch(debug=True)
178
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai==0.27.7
2
+ langchain==0.0.179
3
+ gradio==3.30.0
4
+ python-dotenv
5
+ wikipedia
6
+ httpcore==0.15
7
+ chromadb
8
+ tiktoken
边城.txt ADDED
The diff for this file is too large to render. See raw diff