Paul-Louis Pröve
commited on
Commit
·
f72cccc
1
Parent(s):
a573f97
initial commit
Browse files- .gitattributes +2 -0
- .gitignore +2 -0
- app.py +102 -0
- pitchbook-excel/index.faiss +3 -0
- pitchbook-excel/index.pkl +3 -0
- requirements.txt +5 -0
- sys_prompt.txt +7 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
pitchbook-excel/index.faiss filter=lfs diff=lfs merge=lfs -text
|
37 |
+
pitchbook-excel/index.pkl filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.vscode
|
app.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
# from sentence_transformers import SentenceTransformer
|
6 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
7 |
+
|
8 |
+
# from langchain.vectorstores.azuresearch import AzureSearch
|
9 |
+
from langchain.vectorstores.faiss import FAISS
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
openai.api_type = "azure"
|
15 |
+
openai.api_version = "2023-05-15"
|
16 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
17 |
+
openai.api_base = os.getenv("OPENAI_API_BASE")
|
18 |
+
|
19 |
+
with open("sys_prompt.txt", "r") as f:
|
20 |
+
sys_prompt = f.read()
|
21 |
+
|
22 |
+
embedder = OpenAIEmbeddings(engine="text-embedding-ada-002", chunk_size=1)
|
23 |
+
# embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
|
24 |
+
|
25 |
+
db = FAISS.load_local("pitchbook-excel", embedder)
|
26 |
+
# db = AzureSearch(
|
27 |
+
# index_name="pitchbook-excel",
|
28 |
+
# azure_search_endpoint=os.environ.get("AZURE_SEARCH_ENDPOINT"),
|
29 |
+
# azure_search_key=os.environ.get("AZURE_SEARCH_KEY"),
|
30 |
+
# embedding_function=embedding_function,
|
31 |
+
# )
|
32 |
+
|
33 |
+
|
34 |
+
def gpt(history, prompt, temp=0.0):
|
35 |
+
hist = [{"role": "system", "content": prompt}]
|
36 |
+
for user, bot in history:
|
37 |
+
hist += [{"role": "user", "content": user}]
|
38 |
+
if bot:
|
39 |
+
hist += [{"role": "assistant", "content": bot}]
|
40 |
+
return openai.ChatCompletion.create(
|
41 |
+
engine="gpt-4-32k",
|
42 |
+
messages=hist,
|
43 |
+
temperature=temp,
|
44 |
+
stream=True,
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
def user(message, history):
|
49 |
+
# Necessary to clear input and display message
|
50 |
+
return "", history + [[message, None]]
|
51 |
+
|
52 |
+
|
53 |
+
def search(history, results, k=8):
|
54 |
+
if results:
|
55 |
+
# If results already exist, don't search again
|
56 |
+
return history, results
|
57 |
+
|
58 |
+
res = db.similarity_search(history[-1][0], k=k)
|
59 |
+
results = [r.page_content for r in res]
|
60 |
+
return history, results
|
61 |
+
|
62 |
+
|
63 |
+
def bot(history, results):
|
64 |
+
res = gpt(history, sys_prompt + str(results))
|
65 |
+
history[-1][1] = ""
|
66 |
+
for chunk in res:
|
67 |
+
if "content" in chunk["choices"][0]["delta"]:
|
68 |
+
history[-1][1] = history[-1][1] + chunk["choices"][0]["delta"]["content"]
|
69 |
+
yield history
|
70 |
+
|
71 |
+
|
72 |
+
with gr.Blocks(
|
73 |
+
css="footer {visibility: hidden} #docs {height: 720px; overflow: auto !important}"
|
74 |
+
) as app:
|
75 |
+
with gr.Row():
|
76 |
+
with gr.Column(scale=1):
|
77 |
+
results = []
|
78 |
+
text = gr.JSON(None, interactive=False, elem_id="docs")
|
79 |
+
with gr.Column(scale=2):
|
80 |
+
chatbot = gr.Chatbot(height=582)
|
81 |
+
with gr.Row():
|
82 |
+
msg = gr.Textbox(show_label=False, scale=7)
|
83 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
84 |
+
search,
|
85 |
+
[chatbot, text],
|
86 |
+
[chatbot, text],
|
87 |
+
queue=False,
|
88 |
+
).then(bot, [chatbot, text], chatbot)
|
89 |
+
# btn = gr.Button("Send", variant="primary")
|
90 |
+
# btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
91 |
+
# search,
|
92 |
+
# [chatbot, text],
|
93 |
+
# [chatbot, text],
|
94 |
+
# queue=False,
|
95 |
+
# ).then(bot, [chatbot, text], chatbot)
|
96 |
+
with gr.Row():
|
97 |
+
gr.Button("Clear").click(
|
98 |
+
lambda x, y: ([], None), [chatbot, text], [chatbot, text]
|
99 |
+
)
|
100 |
+
gr.Button("Undo").click(lambda x: (x[:-1]), [chatbot], [chatbot])
|
101 |
+
|
102 |
+
app.queue().launch(auth=(os.getenv("AUTH_USER"), os.getenv("AUTH_PASSWORD")))
|
pitchbook-excel/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee08f59009ef3c1674462c00549d5f6b8e262173187eb77321cec917352f9519
|
3 |
+
size 2076717
|
pitchbook-excel/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cad16e05c87e3994fcba1a82c67dd9974db20e7d9df857154ba857b714a3db9
|
3 |
+
size 531874
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
langchain
|
3 |
+
python-dotenv
|
4 |
+
azure-identity
|
5 |
+
azure-search-documents==11.4.0b8
|
sys_prompt.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a helpful assistant that answers questions and queries.
|
2 |
+
You respond based on documents that will be provided to you.
|
3 |
+
You only and exclusively use the documents as a source of information.
|
4 |
+
If the documents don't provide the answer or are empty, simply say so.
|
5 |
+
Use only those documents that are strictly relevant to the query.
|
6 |
+
|
7 |
+
Documents:
|