Spaces:
Sleeping
Sleeping
first commit
Browse files- .gitattributes +3 -0
- app.py +100 -0
- data/chroma-collections.parquet +3 -0
- data/chroma-embeddings.parquet +3 -0
- data/index/id_to_uuid_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl +3 -0
- data/index/id_to_uuid_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl +3 -0
- data/index/index_569e1be0-57b0-4ef4-abcc-a40cccd1b383.bin +3 -0
- data/index/index_68425e73-3b8d-47fd-9dd0-ceb0ad113836.bin +3 -0
- data/index/index_metadata_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl +3 -0
- data/index/index_metadata_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl +3 -0
- data/index/uuid_to_id_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl +3 -0
- data/index/uuid_to_id_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl +3 -0
.gitattributes
CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
data/chroma-collections.parquet filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/index/index_68425e73-3b8d-47fd-9dd0-ceb0ad113836.bin filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/index/index_569e1be0-57b0-4ef4-abcc-a40cccd1b383.bin filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
5 |
+
from langchain.vectorstores import Chroma
|
6 |
+
from langchain.document_loaders import TextLoader
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
|
9 |
+
from langchain.chains import RetrievalQA
|
10 |
+
from langchain.llms import OpenAI
|
11 |
+
|
12 |
+
|
13 |
+
st.set_page_config(page_title="CoreMind AI", layout="wide")
|
14 |
+
|
15 |
+
st.header("CoreMind AI")
|
16 |
+
|
17 |
+
st.sidebar.title("Options")
|
18 |
+
|
19 |
+
# Set your OpenAI API key here or delete and put it in your environment variables as OPENAI_API_KEY
|
20 |
+
openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key")
|
21 |
+
|
22 |
+
|
23 |
+
if openai_key:
|
24 |
+
os.environ["OPENAI_API_KEY"] = openai_key
|
25 |
+
|
26 |
+
# set up data
|
27 |
+
|
28 |
+
loader = TextLoader("raw_data.txt")
|
29 |
+
|
30 |
+
embeddings = OpenAIEmbeddings()
|
31 |
+
|
32 |
+
docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
|
33 |
+
|
34 |
+
|
35 |
+
qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.5, step=0.01, key="temperature")
|
36 |
+
qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model")
|
37 |
+
|
38 |
+
# st.sidebar.markdown("----")
|
39 |
+
|
40 |
+
# allow the user to insert new data
|
41 |
+
# new_data = st.sidebar.text_area("Add new data", key="new_data")
|
42 |
+
# if new_data:
|
43 |
+
# # save data to user_data
|
44 |
+
# with open("user_data/raw_data.txt", "a") as f:
|
45 |
+
# f.write(new_data)
|
46 |
+
|
47 |
+
# loader = TextLoader("user_data/raw_data.txt")
|
48 |
+
# documents = loader.load()
|
49 |
+
# text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
|
50 |
+
# texts = text_splitter.split_documents(documents)
|
51 |
+
# docsearch.add_documents(texts)
|
52 |
+
# docsearch.persist()
|
53 |
+
|
54 |
+
|
55 |
+
# st.sidebar.markdown("----")
|
56 |
+
# choice = st.sidebar.selectbox("Example prompts", ["Compare the revenues and operating profits between 1996, 1995 and 1994", "Get me the common stock investments values and organise them in a format easy for plotting"])
|
57 |
+
# button = st.sidebar.button("Run Example Prompt")
|
58 |
+
|
59 |
+
|
60 |
+
qa = RetrievalQA.from_chain_type(
|
61 |
+
llm=OpenAI(temperature=qa_temperature, model_name=qa_model),
|
62 |
+
retriever=docsearch.as_retriever(),
|
63 |
+
# reduce_k_below_max_tokens=True
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
# if button:
|
68 |
+
# prompt = st.text_input("Enter your prompt", value=choice, key="prompt")
|
69 |
+
# else:
|
70 |
+
|
71 |
+
|
72 |
+
prompt = st.text_input("Enter your prompt", value="Imagine you are an analyst looking into Berkshire Hathaway's relationship with GEICO, write a 200 word summary on this aimed at impressing your boss.", key="prompt")
|
73 |
+
|
74 |
+
if prompt:
|
75 |
+
response = qa.run(prompt)
|
76 |
+
st.write(response)
|
77 |
+
# plot_this = st.button("Give me code to plot this")
|
78 |
+
|
79 |
+
# if plot_this:
|
80 |
+
# followup_llm = OpenAI(temperature=0, model_name="gpt-3.5-turbo")
|
81 |
+
# plot_code = followup_llm(f"create python code to plot this: {response}")
|
82 |
+
# st.code(plot_code)
|
83 |
+
|
84 |
+
# put the sources in an expander
|
85 |
+
with st.expander("Sources"):
|
86 |
+
retriever = docsearch.as_retriever()
|
87 |
+
|
88 |
+
# create a table of the top 3 most similar documents
|
89 |
+
similarity_table = retriever.vectorstore.similarity_search_with_score(prompt, k=3)
|
90 |
+
st.write(similarity_table)
|
91 |
+
|
92 |
+
|
93 |
+
# in an expander, show the documents as a UMAP
|
94 |
+
# with st.expander("Document Embeddings"):
|
95 |
+
# df = pd.read_parquet("/Users/hamish/Workspace/delphicai/data/chroma-embeddings.parquet")
|
96 |
+
# umap_mapper = umap.UMAP(n_neighbors=5, min_dist=0.1).fit(df.embedding.tolist())
|
97 |
+
# p = umap.plot.interactive(umap_mapper, point_size=5)
|
98 |
+
|
99 |
+
# # show the plot in streamlit
|
100 |
+
# st.bokeh_chart(p, use_container_width=True)
|
data/chroma-collections.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:187e9ab81a8ddc83a4197346733cf576aef5641b17a101e7cada20f766b063b0
|
3 |
+
size 557
|
data/chroma-embeddings.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b6274fd8c7d2b130d78f70fff66c190823083932ca0c189901ecb04a238e39
|
3 |
+
size 5468138
|
data/index/id_to_uuid_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad11d5913073c6bd04169cd9b66d8ef210bdefda127b1a285f011244f9955b82
|
3 |
+
size 6077
|
data/index/id_to_uuid_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d57feb0308972a3ab352191d913af01f63a834a44586d873e64e82ae9c78652a
|
3 |
+
size 14031
|
data/index/index_569e1be0-57b0-4ef4-abcc-a40cccd1b383.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8108b2b89235fcc744b9dada9ca7bd8e4b6268b8fba0c4c6de7c93c6b04779f1
|
3 |
+
size 1208276
|
data/index/index_68425e73-3b8d-47fd-9dd0-ceb0ad113836.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:748f63e3c85ff8a065f50516ae0e9e7e0e5681f8b2927773e1234d450c81084f
|
3 |
+
size 2756144
|
data/index/index_metadata_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ada77f186fa63ea591d1166c8b2d5ba8b9883d0b5cd527e5d5b1f4a3ed64808
|
3 |
+
size 73
|
data/index/index_metadata_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:571d06c80abef5303ad067a2522a3e22982d39f33b1b03a0edc00e820fcf6170
|
3 |
+
size 74
|
data/index/uuid_to_id_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc5b7aeb57bbd35985867677f88b1dc7af6e65386786a73d065dc2f117bf9e21
|
3 |
+
size 7120
|
data/index/uuid_to_id_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec75e2300d30997d654cb028c6f47bccf2e809ddd3dc703386ba9c0bf2142d07
|
3 |
+
size 16405
|