Paul-Louis Pröve commited on
Commit
f72cccc
·
1 Parent(s): a573f97

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pitchbook-excel/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ pitchbook-excel/index.pkl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .vscode
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import gradio as gr
4
+
5
+ # from sentence_transformers import SentenceTransformer
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+
8
+ # from langchain.vectorstores.azuresearch import AzureSearch
9
+ from langchain.vectorstores.faiss import FAISS
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ openai.api_type = "azure"
15
+ openai.api_version = "2023-05-15"
16
+ openai.api_key = os.getenv("OPENAI_API_KEY")
17
+ openai.api_base = os.getenv("OPENAI_API_BASE")
18
+
19
+ with open("sys_prompt.txt", "r") as f:
20
+ sys_prompt = f.read()
21
+
22
+ embedder = OpenAIEmbeddings(engine="text-embedding-ada-002", chunk_size=1)
23
+ # embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
24
+
25
+ db = FAISS.load_local("pitchbook-excel", embedder)
26
+ # db = AzureSearch(
27
+ # index_name="pitchbook-excel",
28
+ # azure_search_endpoint=os.environ.get("AZURE_SEARCH_ENDPOINT"),
29
+ # azure_search_key=os.environ.get("AZURE_SEARCH_KEY"),
30
+ # embedding_function=embedding_function,
31
+ # )
32
+
33
+
34
+ def gpt(history, prompt, temp=0.0):
35
+ hist = [{"role": "system", "content": prompt}]
36
+ for user, bot in history:
37
+ hist += [{"role": "user", "content": user}]
38
+ if bot:
39
+ hist += [{"role": "assistant", "content": bot}]
40
+ return openai.ChatCompletion.create(
41
+ engine="gpt-4-32k",
42
+ messages=hist,
43
+ temperature=temp,
44
+ stream=True,
45
+ )
46
+
47
+
48
+ def user(message, history):
49
+ # Necessary to clear input and display message
50
+ return "", history + [[message, None]]
51
+
52
+
53
+ def search(history, results, k=8):
54
+ if results:
55
+ # If results already exist, don't search again
56
+ return history, results
57
+
58
+ res = db.similarity_search(history[-1][0], k=k)
59
+ results = [r.page_content for r in res]
60
+ return history, results
61
+
62
+
63
+ def bot(history, results):
64
+ res = gpt(history, sys_prompt + str(results))
65
+ history[-1][1] = ""
66
+ for chunk in res:
67
+ if "content" in chunk["choices"][0]["delta"]:
68
+ history[-1][1] = history[-1][1] + chunk["choices"][0]["delta"]["content"]
69
+ yield history
70
+
71
+
72
+ with gr.Blocks(
73
+ css="footer {visibility: hidden} #docs {height: 720px; overflow: auto !important}"
74
+ ) as app:
75
+ with gr.Row():
76
+ with gr.Column(scale=1):
77
+ results = []
78
+ text = gr.JSON(None, interactive=False, elem_id="docs")
79
+ with gr.Column(scale=2):
80
+ chatbot = gr.Chatbot(height=582)
81
+ with gr.Row():
82
+ msg = gr.Textbox(show_label=False, scale=7)
83
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
84
+ search,
85
+ [chatbot, text],
86
+ [chatbot, text],
87
+ queue=False,
88
+ ).then(bot, [chatbot, text], chatbot)
89
+ # btn = gr.Button("Send", variant="primary")
90
+ # btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
91
+ # search,
92
+ # [chatbot, text],
93
+ # [chatbot, text],
94
+ # queue=False,
95
+ # ).then(bot, [chatbot, text], chatbot)
96
+ with gr.Row():
97
+ gr.Button("Clear").click(
98
+ lambda x, y: ([], None), [chatbot, text], [chatbot, text]
99
+ )
100
+ gr.Button("Undo").click(lambda x: (x[:-1]), [chatbot], [chatbot])
101
+
102
+ app.queue().launch(auth=(os.getenv("AUTH_USER"), os.getenv("AUTH_PASSWORD")))
pitchbook-excel/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee08f59009ef3c1674462c00549d5f6b8e262173187eb77321cec917352f9519
3
+ size 2076717
pitchbook-excel/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cad16e05c87e3994fcba1a82c67dd9974db20e7d9df857154ba857b714a3db9
3
+ size 531874
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ langchain
3
+ python-dotenv
4
+ azure-identity
5
+ azure-search-documents==11.4.0b8
sys_prompt.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ You are a helpful assistant that answers questions and queries.
2
+ You respond based on documents that will be provided to you.
3
+ You only and exclusively use the documents as a source of information.
4
+ If the documents don't provide the answer or are empty, simply say so.
5
+ Use only those documents that are strictly relevant to the query.
6
+
7
+ Documents: