Spaces:
Sleeping
Sleeping
danielcwq
commited on
Commit
•
ed7c827
0
Parent(s):
Duplicate from danielcwq/chat-your-data-ChatOpenAI-trial
Browse files- .gitattributes +34 -0
- README.md +14 -0
- app.py +103 -0
- cli_app.py +17 -0
- econgeoghistvectorstore.pkl +3 -0
- ingest_data.py +23 -0
- query_data.py +58 -0
- requirements.txt +5 -0
- vectorstore.pkl +3 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Chat Your H2 Humanities
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: danielcwq/chat-your-data-ChatOpenAI-trial
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Optional, Tuple
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pickle
|
6 |
+
from query_data import get_chain
|
7 |
+
from threading import Lock
|
8 |
+
|
9 |
+
with open("econgeoghistvectorstore.pkl", "rb") as f:
|
10 |
+
vectorstore = pickle.load(f)
|
11 |
+
|
12 |
+
|
13 |
+
def set_openai_api_key(api_key: str):
|
14 |
+
"""Set the api key and return chain.
|
15 |
+
If no api_key, then None is returned.
|
16 |
+
"""
|
17 |
+
if api_key:
|
18 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
19 |
+
chain = get_chain(vectorstore)
|
20 |
+
os.environ["OPENAI_API_KEY"] = ""
|
21 |
+
return chain
|
22 |
+
|
23 |
+
class ChatWrapper:
|
24 |
+
|
25 |
+
def __init__(self):
|
26 |
+
self.lock = Lock()
|
27 |
+
def __call__(
|
28 |
+
self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
|
29 |
+
):
|
30 |
+
"""Execute the chat functionality."""
|
31 |
+
self.lock.acquire()
|
32 |
+
try:
|
33 |
+
history = history or []
|
34 |
+
# If chain is None, that is because no API key was provided.
|
35 |
+
if chain is None:
|
36 |
+
history.append((inp, "Please paste your OpenAI key to use"))
|
37 |
+
return history, history
|
38 |
+
# Set OpenAI key
|
39 |
+
import openai
|
40 |
+
openai.api_key = api_key
|
41 |
+
# Run chain and append input.
|
42 |
+
output = chain({"question": inp, "chat_history": history})["answer"]
|
43 |
+
history.append((inp, output))
|
44 |
+
except Exception as e:
|
45 |
+
raise e
|
46 |
+
finally:
|
47 |
+
self.lock.release()
|
48 |
+
return history, history
|
49 |
+
|
50 |
+
chat = ChatWrapper()
|
51 |
+
|
52 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
53 |
+
|
54 |
+
with block:
|
55 |
+
with gr.Row():
|
56 |
+
gr.Markdown("<h3><center>Chat-Your-H2 Humanities (History, Economics, Geography)</center></h3>")
|
57 |
+
|
58 |
+
openai_api_key_textbox = gr.Textbox(
|
59 |
+
placeholder="Paste your OpenAI API key (sk-...)",
|
60 |
+
show_label=False,
|
61 |
+
lines=1,
|
62 |
+
type="password",
|
63 |
+
)
|
64 |
+
|
65 |
+
chatbot = gr.Chatbot()
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
message = gr.Textbox(
|
69 |
+
label="What's your question?",
|
70 |
+
placeholder="Ask questions about anything covered in the H2 Humanities (History, Economics, Geography) syllabus",
|
71 |
+
lines=1,
|
72 |
+
)
|
73 |
+
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
74 |
+
|
75 |
+
gr.Examples(
|
76 |
+
examples=[
|
77 |
+
"Explain the differences between physical and chemical weathering in the humid tropics.",
|
78 |
+
"Use the real wealth effect to explain the negative gradient of the AD curve.",
|
79 |
+
"Explain the multiplier process.",
|
80 |
+
"To what extent were the problems of the crisis decades caused by the actions of the US?"
|
81 |
+
],
|
82 |
+
inputs=message,
|
83 |
+
)
|
84 |
+
|
85 |
+
gr.HTML("Demo application of a LangChain chain, built on H2 Economics, H2 History and H2 Geography Data. Many thanks to Jean Chua for giving her notes for Econs, and Yu Tang for his input on Geog.")
|
86 |
+
|
87 |
+
gr.HTML(
|
88 |
+
"<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
|
89 |
+
)
|
90 |
+
|
91 |
+
state = gr.State()
|
92 |
+
agent_state = gr.State()
|
93 |
+
|
94 |
+
submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
95 |
+
message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
96 |
+
|
97 |
+
openai_api_key_textbox.change(
|
98 |
+
set_openai_api_key,
|
99 |
+
inputs=[openai_api_key_textbox],
|
100 |
+
outputs=[agent_state],
|
101 |
+
)
|
102 |
+
|
103 |
+
block.launch(debug=True)
|
cli_app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
from query_data import get_chain
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
with open("econgeoghistvectorstore.pkl", "rb") as f:
|
7 |
+
vectorstore = pickle.load(f)
|
8 |
+
qa_chain = get_chain(vectorstore)
|
9 |
+
chat_history = []
|
10 |
+
print("Chat with your docs!")
|
11 |
+
while True:
|
12 |
+
print("Human:")
|
13 |
+
question = input()
|
14 |
+
result = qa_chain({"question": question, "chat_history": chat_history})
|
15 |
+
chat_history.append((question, result["answer"]))
|
16 |
+
print("AI:")
|
17 |
+
print(result["answer"])
|
econgeoghistvectorstore.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aab742b62c3b3405a2d431259fb693ad65cf60a855424f1fb5c28076e554ec6
|
3 |
+
size 23618132
|
ingest_data.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
3 |
+
from langchain.vectorstores.faiss import FAISS
|
4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
# Load Data
|
8 |
+
loader = UnstructuredFileLoader("state_of_the_union.txt")
|
9 |
+
raw_documents = loader.load()
|
10 |
+
|
11 |
+
# Split text
|
12 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
13 |
+
documents = text_splitter.split_documents(raw_documents)
|
14 |
+
|
15 |
+
|
16 |
+
# Load Data to vectorstore
|
17 |
+
embeddings = OpenAIEmbeddings()
|
18 |
+
vectorstore = FAISS.from_documents(documents, embeddings)
|
19 |
+
|
20 |
+
|
21 |
+
# Save vectorstore
|
22 |
+
with open("vectorstore.pkl", "wb") as f:
|
23 |
+
pickle.dump(vectorstore, f)
|
query_data.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts.prompt import PromptTemplate
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.chains import ChatVectorDBChain
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
from langchain.prompts.chat import (
|
6 |
+
ChatPromptTemplate,
|
7 |
+
SystemMessagePromptTemplate,
|
8 |
+
AIMessagePromptTemplate,
|
9 |
+
HumanMessagePromptTemplate,
|
10 |
+
)
|
11 |
+
|
12 |
+
from langchain.schema import (
|
13 |
+
AIMessage,
|
14 |
+
HumanMessage,
|
15 |
+
SystemMessage
|
16 |
+
)
|
17 |
+
|
18 |
+
system_template = """Use the following pieces of context to answer the users question.
|
19 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
20 |
+
----------------
|
21 |
+
{context}"""
|
22 |
+
|
23 |
+
messages = [
|
24 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
25 |
+
HumanMessagePromptTemplate.from_template("{question}")
|
26 |
+
]
|
27 |
+
prompt = ChatPromptTemplate.from_messages(messages)
|
28 |
+
|
29 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
30 |
+
You can assume the question about the syllabus of the H2 Economics, H2 History and H2 Geography A-Level Examinations in Singapore.
|
31 |
+
|
32 |
+
Chat History:
|
33 |
+
{chat_history}
|
34 |
+
Follow Up Input: {question}
|
35 |
+
Standalone question:"""
|
36 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
37 |
+
|
38 |
+
#template = """You are an AI assistant for answering questions about history, geography or economics for the H2 A-Levels.
|
39 |
+
#You are given the following extracted parts of a long document and a question. Provide a conversational answer.
|
40 |
+
#If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
41 |
+
#If the question is not about history, geography or economics, politely inform them that you are tuned to only answer questions about it.
|
42 |
+
#Question: {question}
|
43 |
+
#=========
|
44 |
+
#{context}
|
45 |
+
#=========
|
46 |
+
#Answer in Markdown:"""
|
47 |
+
#QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
|
48 |
+
prompt = ChatPromptTemplate.from_messages(messages)
|
49 |
+
|
50 |
+
def get_chain(vectorstore):
|
51 |
+
llm = ChatOpenAI(temperature=0)
|
52 |
+
qa_chain = ChatVectorDBChain.from_llm(
|
53 |
+
llm,
|
54 |
+
vectorstore,
|
55 |
+
qa_prompt=prompt,
|
56 |
+
condense_question_prompt = CONDENSE_QUESTION_PROMPT
|
57 |
+
)
|
58 |
+
return qa_chain
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
unstructured
|
4 |
+
faiss-cpu
|
5 |
+
gradio
|
vectorstore.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dbc3a6b561a37e67ed7afd7827808b219706f3daf4d280b2894e9116c43a994
|
3 |
+
size 1896857
|