Spaces:

tensora
/

pb-docs

Sleeping

App Files Files Community

Paul-Louis Pröve commited on Oct 12, 2023

Commit

f72cccc

1 Parent(s): a573f97

initial commit

Browse files

Files changed (7) hide show

.gitattributes +2 -0
.gitignore +2 -0
app.py +102 -0
pitchbook-excel/index.faiss +3 -0
pitchbook-excel/index.pkl +3 -0
requirements.txt +5 -0
sys_prompt.txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+pitchbook-excel/index.faiss filter=lfs diff=lfs merge=lfs -text
+pitchbook-excel/index.pkl filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ .vscode

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import openai
+import gradio as gr
+# from sentence_transformers import SentenceTransformer
+from langchain.embeddings.openai import OpenAIEmbeddings
+# from langchain.vectorstores.azuresearch import AzureSearch
+from langchain.vectorstores.faiss import FAISS
+from dotenv import load_dotenv
+load_dotenv()
+openai.api_type = "azure"
+openai.api_version = "2023-05-15"
+openai.api_key = os.getenv("OPENAI_API_KEY")
+openai.api_base = os.getenv("OPENAI_API_BASE")
+with open("sys_prompt.txt", "r") as f:
+    sys_prompt = f.read()
+embedder = OpenAIEmbeddings(engine="text-embedding-ada-002", chunk_size=1)
+# embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
+db = FAISS.load_local("pitchbook-excel", embedder)
+# db = AzureSearch(
+#     index_name="pitchbook-excel",
+#     azure_search_endpoint=os.environ.get("AZURE_SEARCH_ENDPOINT"),
+#     azure_search_key=os.environ.get("AZURE_SEARCH_KEY"),
+#     embedding_function=embedding_function,
+# )
+def gpt(history, prompt, temp=0.0):
+    hist = [{"role": "system", "content": prompt}]
+    for user, bot in history:
+        hist += [{"role": "user", "content": user}]
+        if bot:
+            hist += [{"role": "assistant", "content": bot}]
+    return openai.ChatCompletion.create(
+        engine="gpt-4-32k",
+        messages=hist,
+        temperature=temp,
+        stream=True,
+    )
+def user(message, history):
+    # Necessary to clear input and display message
+    return "", history + [[message, None]]
+def search(history, results, k=8):
+    if results:
+        # If results already exist, don't search again
+        return history, results
+    res = db.similarity_search(history[-1][0], k=k)
+    results = [r.page_content for r in res]
+    return history, results
+def bot(history, results):
+    res = gpt(history, sys_prompt + str(results))
+    history[-1][1] = ""
+    for chunk in res:
+        if "content" in chunk["choices"][0]["delta"]:
+            history[-1][1] = history[-1][1] + chunk["choices"][0]["delta"]["content"]
+            yield history
+with gr.Blocks(
+    css="footer {visibility: hidden} #docs {height: 720px; overflow: auto !important}"
+) as app:
+    with gr.Row():
+        with gr.Column(scale=1):
+            results = []
+            text = gr.JSON(None, interactive=False, elem_id="docs")
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=582)
+            with gr.Row():
+                msg = gr.Textbox(show_label=False, scale=7)
+                msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                    search,
+                    [chatbot, text],
+                    [chatbot, text],
+                    queue=False,
+                ).then(bot, [chatbot, text], chatbot)
+                # btn = gr.Button("Send", variant="primary")
+                # btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                #     search,
+                #     [chatbot, text],
+                #     [chatbot, text],
+                #     queue=False,
+                # ).then(bot, [chatbot, text], chatbot)
+            with gr.Row():
+                gr.Button("Clear").click(
+                    lambda x, y: ([], None), [chatbot, text], [chatbot, text]
+                )
+                gr.Button("Undo").click(lambda x: (x[:-1]), [chatbot], [chatbot])
+    app.queue().launch(auth=(os.getenv("AUTH_USER"), os.getenv("AUTH_PASSWORD")))

pitchbook-excel/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee08f59009ef3c1674462c00549d5f6b8e262173187eb77321cec917352f9519
+size 2076717

pitchbook-excel/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8cad16e05c87e3994fcba1a82c67dd9974db20e7d9df857154ba857b714a3db9
+size 531874

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai
+langchain
+python-dotenv
+azure-identity
+azure-search-documents==11.4.0b8

sys_prompt.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+You are a helpful assistant that answers questions and queries.
+You respond based on documents that will be provided to you.
+You only and exclusively use the documents as a source of information.
+If the documents don't provide the answer or are empty, simply say so.
+Use only those documents that are strictly relevant to the query.
+Documents: