Matt Robinson commited on
Commit
01095ae
1 Parent(s): fe39603

chat isw app files

Browse files
Files changed (8) hide show
  1. LICENSE +21 -0
  2. README.md +17 -13
  3. app.py +103 -0
  4. cli_app.py +17 -0
  5. ingest_data.py +30 -0
  6. query_data.py +34 -0
  7. requirements.txt +6 -0
  8. vectorstore.pkl +0 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Harrison Chase
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,17 @@
1
- ---
2
- title: Chat Your Data Isw
3
- emoji: 📚
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 3.18.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ # Chat-Your-Data
2
+
3
+ Create a ChatGPT like experience over your custom docs using [LangChain](https://github.com/hwchase17/langchain).
4
+
5
+ See [this blog post](https://blog.langchain.dev/tutorial-chatgpt-over-your-data/) for a more detailed explanation.
6
+
7
+ ## Ingest data
8
+
9
+ Ingestion of data is done over the `state_of_the_union.txt` file.
10
+ Therefore, the only thing that is needed is to be done to ingest data is run `python ingest_data.py`
11
+
12
+ ## Query data
13
+ Custom prompts are used to ground the answers in the state of the union text file.
14
+
15
+ ## Running the Application
16
+
17
+ By running `python app.py` from the command line you can easily interact with your ChatGPT over your own data.
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
+ import gradio as gr
5
+ import pickle
6
+ from query_data import get_chain
7
+ from threading import Lock
8
+
9
+ with open("vectorstore.pkl", "rb") as f:
10
+ vectorstore = pickle.load(f)
11
+
12
+
13
+ def set_openai_api_key(api_key: str):
14
+ """Set the api key and return chain.
15
+ If no api_key, then None is returned.
16
+ """
17
+ if api_key:
18
+ os.environ["OPENAI_API_KEY"] = api_key
19
+ chain = get_chain(vectorstore)
20
+ os.environ["OPENAI_API_KEY"] = ""
21
+ return chain
22
+
23
+ class ChatWrapper:
24
+
25
+ def __init__(self):
26
+ self.lock = Lock()
27
+ def __call__(
28
+ self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
29
+ ):
30
+ """Execute the chat functionality."""
31
+ self.lock.acquire()
32
+ try:
33
+ history = history or []
34
+ # If chain is None, that is because no API key was provided.
35
+ if chain is None:
36
+ history.append((inp, "Please paste your OpenAI key to use"))
37
+ return history, history
38
+ # Set OpenAI key
39
+ import openai
40
+ openai.api_key = api_key
41
+ # Run chain and append input.
42
+ output = chain({"question": inp, "chat_history": history})["answer"]
43
+ history.append((inp, output))
44
+ except Exception as e:
45
+ raise e
46
+ finally:
47
+ self.lock.release()
48
+ return history, history
49
+
50
+ chat = ChatWrapper()
51
+
52
+ block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
53
+
54
+ with block:
55
+ with gr.Row():
56
+ gr.Markdown("<h3><center>Chat-Your-Data (ISW Updates)</center></h3>")
57
+
58
+ openai_api_key_textbox = gr.Textbox(
59
+ placeholder="Paste your OpenAI API key (sk-...)",
60
+ show_label=False,
61
+ lines=1,
62
+ type="password",
63
+ )
64
+
65
+ chatbot = gr.Chatbot()
66
+
67
+ with gr.Row():
68
+ message = gr.Textbox(
69
+ label="What's your question?",
70
+ placeholder="Ask questions about the war in Ukraine",
71
+ lines=1,
72
+ )
73
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
74
+
75
+ gr.Examples(
76
+ examples=[
77
+ "What is the focus of the Russian offensive?",
78
+ "Where are the frontlines?",
79
+ "How are they consolidating power?",
80
+ ],
81
+ inputs=message,
82
+ )
83
+
84
+ gr.HTML("Demo application of a LangChain chain.")
85
+
86
+ gr.HTML("""<center>
87
+ Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
88
+ and <a href='https://github.com/unstructured-io/unstructured'>Unstructured.IO</a>
89
+ </center>""")
90
+
91
+ state = gr.State()
92
+ agent_state = gr.State()
93
+
94
+ submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
95
+ message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
96
+
97
+ openai_api_key_textbox.change(
98
+ set_openai_api_key,
99
+ inputs=[openai_api_key_textbox],
100
+ outputs=[agent_state],
101
+ )
102
+
103
+ block.launch(debug=True)
cli_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from query_data import get_chain
3
+
4
+
5
+ if __name__ == "__main__":
6
+ with open("vectorstore.pkl", "rb") as f:
7
+ vectorstore = pickle.load(f)
8
+ qa_chain = get_chain(vectorstore)
9
+ chat_history = []
10
+ print("Chat with your docs!")
11
+ while True:
12
+ print("Human:")
13
+ question = input()
14
+ result = qa_chain({"question": question, "chat_history": chat_history})
15
+ chat_history.append((question, result["answer"]))
16
+ print("AI:")
17
+ print(result["answer"])
ingest_data.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredURLLoader
3
+ from langchain.vectorstores.faiss import FAISS
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ import pickle
6
+
7
+ # Load Data
8
+ urls = [
9
+ "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-4-2023",
10
+ "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-6-2023",
11
+ "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-7-2023",
12
+ "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023",
13
+ "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023",
14
+ ]
15
+ loader = UnstructuredURLLoader(urls=urls)
16
+ raw_documents = loader.load()
17
+
18
+ # Split text
19
+ text_splitter = RecursiveCharacterTextSplitter()
20
+ documents = text_splitter.split_documents(raw_documents)
21
+
22
+
23
+ # Load Data to vectorstore
24
+ embeddings = OpenAIEmbeddings()
25
+ vectorstore = FAISS.from_documents(documents, embeddings)
26
+
27
+
28
+ # Save vectorstore
29
+ with open("vectorstore.pkl", "wb") as f:
30
+ pickle.dump(vectorstore, f)
query_data.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts.prompt import PromptTemplate
2
+ from langchain.llms import OpenAI
3
+ from langchain.chains import ChatVectorDBChain
4
+
5
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
6
+ You can assume the question about the war in Ukraine.
7
+
8
+ Chat History:
9
+ {chat_history}
10
+ Follow Up Input: {question}
11
+ Standalone question:"""
12
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
13
+
14
+ template = """You are an AI assistant for answering questions about the war in Ukraine.
15
+ You are given the following extracted parts of a long document and a question. Provide a conversational answer.
16
+ If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
17
+ If the question is not about the war in Ukraine, politely inform them that you are tuned to only answer questions about the war in Ukraine.
18
+ Question: {question}
19
+ =========
20
+ {context}
21
+ =========
22
+ Answer in Markdown:"""
23
+ QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
24
+
25
+
26
+ def get_chain(vectorstore):
27
+ llm = OpenAI(temperature=0)
28
+ qa_chain = ChatVectorDBChain.from_llm(
29
+ llm,
30
+ vectorstore,
31
+ qa_prompt=QA_PROMPT,
32
+ condense_question_prompt=CONDENSE_QUESTION_PROMPT,
33
+ )
34
+ return qa_chain
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ beautifulsoup4
2
+ langchain
3
+ openai
4
+ unstructured>=0.4.7
5
+ faiss-cpu
6
+ gradio
vectorstore.pkl ADDED
Binary file (499 kB). View file