Spaces:
Sleeping
Sleeping
danielcwq
commited on
Commit
•
6772051
0
Parent(s):
Duplicate from danielcwq/chat-your-data-trial
Browse files- .gitattributes +34 -0
- README.md +14 -0
- app.py +102 -0
- cli_app.py +17 -0
- ingest_data.py +23 -0
- query_data.py +34 -0
- requirements.txt +5 -0
- vectorstore.pkl +3 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Chat Your Data H2 Economics
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: danielcwq/chat-your-data-trial
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Optional, Tuple
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pickle
|
6 |
+
from query_data import get_chain
|
7 |
+
from threading import Lock
|
8 |
+
|
9 |
+
with open("vectorstore.pkl", "rb") as f:
|
10 |
+
vectorstore = pickle.load(f)
|
11 |
+
|
12 |
+
|
13 |
+
def set_openai_api_key(api_key: str):
|
14 |
+
"""Set the api key and return chain.
|
15 |
+
If no api_key, then None is returned.
|
16 |
+
"""
|
17 |
+
if api_key:
|
18 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
19 |
+
chain = get_chain(vectorstore)
|
20 |
+
os.environ["OPENAI_API_KEY"] = ""
|
21 |
+
return chain
|
22 |
+
|
23 |
+
class ChatWrapper:
|
24 |
+
|
25 |
+
def __init__(self):
|
26 |
+
self.lock = Lock()
|
27 |
+
def __call__(
|
28 |
+
self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
|
29 |
+
):
|
30 |
+
"""Execute the chat functionality."""
|
31 |
+
self.lock.acquire()
|
32 |
+
try:
|
33 |
+
history = history or []
|
34 |
+
# If chain is None, that is because no API key was provided.
|
35 |
+
if chain is None:
|
36 |
+
history.append((inp, "Please paste your OpenAI key to use"))
|
37 |
+
return history, history
|
38 |
+
# Set OpenAI key
|
39 |
+
import openai
|
40 |
+
openai.api_key = api_key
|
41 |
+
# Run chain and append input.
|
42 |
+
output = chain({"question": inp, "chat_history": history})["answer"]
|
43 |
+
history.append((inp, output))
|
44 |
+
except Exception as e:
|
45 |
+
raise e
|
46 |
+
finally:
|
47 |
+
self.lock.release()
|
48 |
+
return history, history
|
49 |
+
|
50 |
+
chat = ChatWrapper()
|
51 |
+
|
52 |
+
block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
|
53 |
+
|
54 |
+
with block:
|
55 |
+
with gr.Row():
|
56 |
+
gr.Markdown("<h3><center>Chat-Your-Data (H2 Economics)</center></h3>")
|
57 |
+
|
58 |
+
openai_api_key_textbox = gr.Textbox(
|
59 |
+
placeholder="Paste your OpenAI API key (sk-...)",
|
60 |
+
show_label=False,
|
61 |
+
lines=1,
|
62 |
+
type="password",
|
63 |
+
)
|
64 |
+
|
65 |
+
chatbot = gr.Chatbot()
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
message = gr.Textbox(
|
69 |
+
label="What's your question?",
|
70 |
+
placeholder="Ask questions about anything covered in the H2 Economics syllabus",
|
71 |
+
lines=1,
|
72 |
+
)
|
73 |
+
submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
|
74 |
+
|
75 |
+
gr.Examples(
|
76 |
+
examples=[
|
77 |
+
"Explain real wealth effect.",
|
78 |
+
"Use the real wealth effect to explain the negative gradient of the AD curve.",
|
79 |
+
"Explain the multiplier process.",
|
80 |
+
],
|
81 |
+
inputs=message,
|
82 |
+
)
|
83 |
+
|
84 |
+
gr.HTML("Demo application of a LangChain chain, built on H2 Economics Data. Many thanks to Jean Chua for giving her notes for this project.")
|
85 |
+
|
86 |
+
gr.HTML(
|
87 |
+
"<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
|
88 |
+
)
|
89 |
+
|
90 |
+
state = gr.State()
|
91 |
+
agent_state = gr.State()
|
92 |
+
|
93 |
+
submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
94 |
+
message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
|
95 |
+
|
96 |
+
openai_api_key_textbox.change(
|
97 |
+
set_openai_api_key,
|
98 |
+
inputs=[openai_api_key_textbox],
|
99 |
+
outputs=[agent_state],
|
100 |
+
)
|
101 |
+
|
102 |
+
block.launch(debug=True)
|
cli_app.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
from query_data import get_chain
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
with open("vectorstore.pkl", "rb") as f:
|
7 |
+
vectorstore = pickle.load(f)
|
8 |
+
qa_chain = get_chain(vectorstore)
|
9 |
+
chat_history = []
|
10 |
+
print("Chat with your docs!")
|
11 |
+
while True:
|
12 |
+
print("Human:")
|
13 |
+
question = input()
|
14 |
+
result = qa_chain({"question": question, "chat_history": chat_history})
|
15 |
+
chat_history.append((question, result["answer"]))
|
16 |
+
print("AI:")
|
17 |
+
print(result["answer"])
|
ingest_data.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain.document_loaders import UnstructuredFileLoader
|
3 |
+
from langchain.vectorstores.faiss import FAISS
|
4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
# Load Data
|
8 |
+
loader = UnstructuredFileLoader("state_of_the_union.txt")
|
9 |
+
raw_documents = loader.load()
|
10 |
+
|
11 |
+
# Split text
|
12 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
13 |
+
documents = text_splitter.split_documents(raw_documents)
|
14 |
+
|
15 |
+
|
16 |
+
# Load Data to vectorstore
|
17 |
+
embeddings = OpenAIEmbeddings()
|
18 |
+
vectorstore = FAISS.from_documents(documents, embeddings)
|
19 |
+
|
20 |
+
|
21 |
+
# Save vectorstore
|
22 |
+
with open("vectorstore.pkl", "wb") as f:
|
23 |
+
pickle.dump(vectorstore, f)
|
query_data.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts.prompt import PromptTemplate
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.chains import ChatVectorDBChain
|
4 |
+
|
5 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
6 |
+
You can assume the question about the syllabus of the H2 Economics A-Level Examination in Singapore.
|
7 |
+
|
8 |
+
Chat History:
|
9 |
+
{chat_history}
|
10 |
+
Follow Up Input: {question}
|
11 |
+
Standalone question:"""
|
12 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
13 |
+
|
14 |
+
template = """You are an AI assistant for answering questions about economics for the H2 Economics A-Levels.
|
15 |
+
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
|
16 |
+
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
17 |
+
If the question is not about H2 Economics, politely inform them that you are tuned to only answer questions about it.
|
18 |
+
Question: {question}
|
19 |
+
=========
|
20 |
+
{context}
|
21 |
+
=========
|
22 |
+
Answer in Markdown:"""
|
23 |
+
QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
|
24 |
+
|
25 |
+
|
26 |
+
def get_chain(vectorstore):
|
27 |
+
llm = OpenAI(temperature=0)
|
28 |
+
qa_chain = ChatVectorDBChain.from_llm(
|
29 |
+
llm,
|
30 |
+
vectorstore,
|
31 |
+
qa_prompt=QA_PROMPT,
|
32 |
+
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
33 |
+
)
|
34 |
+
return qa_chain
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
unstructured
|
4 |
+
faiss-cpu
|
5 |
+
gradio
|
vectorstore.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dbc3a6b561a37e67ed7afd7827808b219706f3daf4d280b2894e9116c43a994
|
3 |
+
size 1896857
|