Spaces:
Runtime error
Runtime error
init
Browse files- app.py +178 -0
- requirements.txt +8 -0
- 边城.txt +0 -0
app.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain import LLMMathChain
|
7 |
+
from langchain.schema import HumanMessage, AIMessage
|
8 |
+
from langchain.utilities import GoogleSerperAPIWrapper, WikipediaAPIWrapper
|
9 |
+
from langchain.agents import initialize_agent, Tool, AgentType
|
10 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
11 |
+
from langchain.vectorstores import Chroma
|
12 |
+
from langchain.docstore.document import Document
|
13 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
14 |
+
from langchain.llms import OpenAI
|
15 |
+
|
16 |
+
embedding = OpenAIEmbeddings()
|
17 |
+
persistDirectory = 'db'
|
18 |
+
|
19 |
+
|
20 |
+
def initialize():
|
21 |
+
llm = ChatOpenAI(temperature=0)
|
22 |
+
|
23 |
+
search = GoogleSerperAPIWrapper()
|
24 |
+
wiki = WikipediaAPIWrapper(top_k_results=1)
|
25 |
+
|
26 |
+
llmMathChain = LLMMathChain.from_llm(llm=llm)
|
27 |
+
tools = [
|
28 |
+
Tool(
|
29 |
+
name="Calculator",
|
30 |
+
func=llmMathChain.run,
|
31 |
+
description="useful for when you need to answer questions about math"
|
32 |
+
),
|
33 |
+
Tool(
|
34 |
+
name="wikipedia",
|
35 |
+
func=wiki.run,
|
36 |
+
description="useful for when you need to answer questions about historical entity. the input to this should be a single search term."
|
37 |
+
),
|
38 |
+
Tool(
|
39 |
+
name="Current Search",
|
40 |
+
func=search.run,
|
41 |
+
description="useful for when you need to answer questions about current events or the current state of the world, also useful if there is no wikipedia result. the input to this should be a single search term."
|
42 |
+
)
|
43 |
+
]
|
44 |
+
|
45 |
+
from langchain.memory import ConversationBufferMemory
|
46 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
47 |
+
|
48 |
+
chatbotEngine = initialize_agent(
|
49 |
+
tools,
|
50 |
+
llm,
|
51 |
+
agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
|
52 |
+
verbose=True,
|
53 |
+
memory=memory)
|
54 |
+
return chatbotEngine
|
55 |
+
|
56 |
+
|
57 |
+
chatbotEngine = initialize()
|
58 |
+
|
59 |
+
|
60 |
+
def chatWithAgent(chatHistory, message=""):
|
61 |
+
if not message.strip():
|
62 |
+
return chatHistory, chatHistory, ""
|
63 |
+
|
64 |
+
try:
|
65 |
+
result = chatbotEngine.run(message.strip())
|
66 |
+
except ValueError:
|
67 |
+
result = "I can't handle this request, please try something else."
|
68 |
+
|
69 |
+
chatHistory.append((message, result))
|
70 |
+
return chatHistory, chatHistory, ""
|
71 |
+
|
72 |
+
|
73 |
+
def chatWithOpenAI(question, chatHistory):
|
74 |
+
chatOpenAi = ChatOpenAI()
|
75 |
+
if len(chatHistory) == 0:
|
76 |
+
ans = chatOpenAi([HumanMessage(content=question)])
|
77 |
+
else:
|
78 |
+
messages = []
|
79 |
+
for i in range(len(chatHistory[0])):
|
80 |
+
if i % 2 == 0:
|
81 |
+
messages.append(HumanMessage(content=chatHistory[0][i]))
|
82 |
+
else:
|
83 |
+
messages.append(AIMessage(content=chatHistory[0][i]))
|
84 |
+
messages.append(HumanMessage(content=question))
|
85 |
+
ans = chatOpenAi(messages)
|
86 |
+
chatHistory.append((question, ans.content))
|
87 |
+
return "", chatHistory
|
88 |
+
|
89 |
+
def splitParagraph(text, pdf_name, maxLength=300):
|
90 |
+
text = text.replace('\n', '')
|
91 |
+
text = text.replace('\n\n', '')
|
92 |
+
text = re.sub(r'\s+', ' ', text)
|
93 |
+
|
94 |
+
sentences = re.split('(;|。|!|\!|\.|?|\?)', text)
|
95 |
+
|
96 |
+
newSents = []
|
97 |
+
for i in range(int(len(sentences) / 2)):
|
98 |
+
sent = sentences[2 * i] + sentences[2 * i + 1]
|
99 |
+
newSents.append(sent)
|
100 |
+
if len(sentences) % 2 == 1:
|
101 |
+
newSents.append(sentences[len(sentences) - 1])
|
102 |
+
|
103 |
+
paragraphs = []
|
104 |
+
current_length = 0
|
105 |
+
current_paragraph = ""
|
106 |
+
for sentence in newSents:
|
107 |
+
sentence_length = len(sentence)
|
108 |
+
if current_length + sentence_length <= maxLength:
|
109 |
+
current_paragraph += sentence
|
110 |
+
current_length += sentence_length
|
111 |
+
else:
|
112 |
+
paragraphs.append(current_paragraph.strip())
|
113 |
+
current_paragraph = sentence
|
114 |
+
current_length = sentence_length
|
115 |
+
paragraphs.append(current_paragraph.strip())
|
116 |
+
documents = []
|
117 |
+
metadata = {"source": pdf_name}
|
118 |
+
for paragraph in paragraphs:
|
119 |
+
new_doc = Document(page_content=paragraph, metadata=metadata)
|
120 |
+
documents.append(new_doc)
|
121 |
+
return documents
|
122 |
+
|
123 |
+
|
124 |
+
def askWithEmbedding(question, chatHistory):
|
125 |
+
# Empty msg
|
126 |
+
if not question.strip():
|
127 |
+
return "", chatHistory
|
128 |
+
|
129 |
+
vectordb = Chroma(persist_directory=persistDirectory, embedding_function=embedding)
|
130 |
+
retriever = vectordb.as_retriever(search_type="mmr")
|
131 |
+
docs = retriever.get_relevant_documents(question)
|
132 |
+
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
|
133 |
+
result = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
|
134 |
+
ans = result['output_text']
|
135 |
+
chatHistory.append((question, ans))
|
136 |
+
|
137 |
+
return "", chatHistory
|
138 |
+
|
139 |
+
|
140 |
+
with gr.Blocks() as demo:
|
141 |
+
gr.Markdown("""<h1><center>Chat with OpenAI!</center></h1>""")
|
142 |
+
# Declaring states
|
143 |
+
chatWithOpenAIHistory = gr.State([])
|
144 |
+
chatWithOpenAIChatBot = gr.Chatbot()
|
145 |
+
chatWithOpenAIMessage = gr.Textbox()
|
146 |
+
chatWithOpenAISubmit = gr.Button("SEND")
|
147 |
+
chatWithOpenAISubmit.click(chatWithOpenAI, inputs=[chatWithOpenAIMessage, chatWithOpenAIChatBot],
|
148 |
+
outputs=[chatWithOpenAIMessage, chatWithOpenAIChatBot])
|
149 |
+
|
150 |
+
gr.Markdown("""<h1><center>Chat with your online-connected bot!</center></h1>""")
|
151 |
+
# Declaring states
|
152 |
+
chatWithAgentHistory = gr.State([])
|
153 |
+
chatbot = gr.Chatbot()
|
154 |
+
message = gr.Textbox()
|
155 |
+
submit = gr.Button("SEND")
|
156 |
+
submit.click(chatWithAgent, inputs=[chatWithAgentHistory, message],
|
157 |
+
outputs=[chatbot, chatWithAgentHistory, message])
|
158 |
+
|
159 |
+
gr.Markdown("""<h1><center>Ask anything about 《边城》!</center></h1>""")
|
160 |
+
embeddingChatHistory = gr.State([])
|
161 |
+
embeddingChatBot = gr.Chatbot()
|
162 |
+
embeddingMessage = gr.Textbox()
|
163 |
+
embeddingSubmit = gr.Button("ASK")
|
164 |
+
embeddingSubmit.click(askWithEmbedding, inputs=[embeddingMessage, embeddingChatBot],
|
165 |
+
outputs=[embeddingMessage, embeddingChatBot])
|
166 |
+
|
167 |
+
if not os.path.exists(persistDirectory):
|
168 |
+
with open("./边城.txt") as f:
|
169 |
+
state_of_the_union = f.read()
|
170 |
+
documents = splitParagraph(state_of_the_union, "边城.txt")
|
171 |
+
vectordb = Chroma.from_documents(documents=documents, embedding=embedding,
|
172 |
+
persist_directory=persistDirectory)
|
173 |
+
vectordb.persist()
|
174 |
+
vectordb = None
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
demo.launch(debug=True)
|
178 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai==0.27.7
|
2 |
+
langchain==0.0.179
|
3 |
+
gradio==3.30.0
|
4 |
+
python-dotenv
|
5 |
+
wikipedia
|
6 |
+
httpcore==0.15
|
7 |
+
chromadb
|
8 |
+
tiktoken
|
边城.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|