Hamish commited on
Commit
74347a6
1 Parent(s): 43e05e7

first commit

Browse files
.gitattributes CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ data/chroma-collections.parquet filter=lfs diff=lfs merge=lfs -text
36
+ data/index/index_68425e73-3b8d-47fd-9dd0-ceb0ad113836.bin filter=lfs diff=lfs merge=lfs -text
37
+ data/index/index_569e1be0-57b0-4ef4-abcc-a40cccd1b383.bin filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.document_loaders import TextLoader
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.llms import OpenAI
11
+
12
+
13
+ st.set_page_config(page_title="CoreMind AI", layout="wide")
14
+
15
+ st.header("CoreMind AI")
16
+
17
+ st.sidebar.title("Options")
18
+
19
+ # Set your OpenAI API key here or delete and put it in your environment variables as OPENAI_API_KEY
20
+ openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key")
21
+
22
+
23
+ if openai_key:
24
+ os.environ["OPENAI_API_KEY"] = openai_key
25
+
26
+ # set up data
27
+
28
+ loader = TextLoader("raw_data.txt")
29
+
30
+ embeddings = OpenAIEmbeddings()
31
+
32
+ docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
33
+
34
+
35
+ qa_temperature = st.sidebar.slider("QA Temperature", min_value=0.0, max_value=2.0, value=0.5, step=0.01, key="temperature")
36
+ qa_model = st.sidebar.selectbox("QA Model", ["gpt-3.5-turbo"], key="model")
37
+
38
+ # st.sidebar.markdown("----")
39
+
40
+ # allow the user to insert new data
41
+ # new_data = st.sidebar.text_area("Add new data", key="new_data")
42
+ # if new_data:
43
+ # # save data to user_data
44
+ # with open("user_data/raw_data.txt", "a") as f:
45
+ # f.write(new_data)
46
+
47
+ # loader = TextLoader("user_data/raw_data.txt")
48
+ # documents = loader.load()
49
+ # text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=300)
50
+ # texts = text_splitter.split_documents(documents)
51
+ # docsearch.add_documents(texts)
52
+ # docsearch.persist()
53
+
54
+
55
+ # st.sidebar.markdown("----")
56
+ # choice = st.sidebar.selectbox("Example prompts", ["Compare the revenues and operating profits between 1996, 1995 and 1994", "Get me the common stock investments values and organise them in a format easy for plotting"])
57
+ # button = st.sidebar.button("Run Example Prompt")
58
+
59
+
60
+ qa = RetrievalQA.from_chain_type(
61
+ llm=OpenAI(temperature=qa_temperature, model_name=qa_model),
62
+ retriever=docsearch.as_retriever(),
63
+ # reduce_k_below_max_tokens=True
64
+ )
65
+
66
+
67
+ # if button:
68
+ # prompt = st.text_input("Enter your prompt", value=choice, key="prompt")
69
+ # else:
70
+
71
+
72
+ prompt = st.text_input("Enter your prompt", value="Imagine you are an analyst looking into Berkshire Hathaway's relationship with GEICO, write a 200 word summary on this aimed at impressing your boss.", key="prompt")
73
+
74
+ if prompt:
75
+ response = qa.run(prompt)
76
+ st.write(response)
77
+ # plot_this = st.button("Give me code to plot this")
78
+
79
+ # if plot_this:
80
+ # followup_llm = OpenAI(temperature=0, model_name="gpt-3.5-turbo")
81
+ # plot_code = followup_llm(f"create python code to plot this: {response}")
82
+ # st.code(plot_code)
83
+
84
+ # put the sources in an expander
85
+ with st.expander("Sources"):
86
+ retriever = docsearch.as_retriever()
87
+
88
+ # create a table of the top 3 most similar documents
89
+ similarity_table = retriever.vectorstore.similarity_search_with_score(prompt, k=3)
90
+ st.write(similarity_table)
91
+
92
+
93
+ # in an expander, show the documents as a UMAP
94
+ # with st.expander("Document Embeddings"):
95
+ # df = pd.read_parquet("/Users/hamish/Workspace/delphicai/data/chroma-embeddings.parquet")
96
+ # umap_mapper = umap.UMAP(n_neighbors=5, min_dist=0.1).fit(df.embedding.tolist())
97
+ # p = umap.plot.interactive(umap_mapper, point_size=5)
98
+
99
+ # # show the plot in streamlit
100
+ # st.bokeh_chart(p, use_container_width=True)
data/chroma-collections.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187e9ab81a8ddc83a4197346733cf576aef5641b17a101e7cada20f766b063b0
3
+ size 557
data/chroma-embeddings.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b6274fd8c7d2b130d78f70fff66c190823083932ca0c189901ecb04a238e39
3
+ size 5468138
data/index/id_to_uuid_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad11d5913073c6bd04169cd9b66d8ef210bdefda127b1a285f011244f9955b82
3
+ size 6077
data/index/id_to_uuid_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d57feb0308972a3ab352191d913af01f63a834a44586d873e64e82ae9c78652a
3
+ size 14031
data/index/index_569e1be0-57b0-4ef4-abcc-a40cccd1b383.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8108b2b89235fcc744b9dada9ca7bd8e4b6268b8fba0c4c6de7c93c6b04779f1
3
+ size 1208276
data/index/index_68425e73-3b8d-47fd-9dd0-ceb0ad113836.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748f63e3c85ff8a065f50516ae0e9e7e0e5681f8b2927773e1234d450c81084f
3
+ size 2756144
data/index/index_metadata_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ada77f186fa63ea591d1166c8b2d5ba8b9883d0b5cd527e5d5b1f4a3ed64808
3
+ size 73
data/index/index_metadata_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:571d06c80abef5303ad067a2522a3e22982d39f33b1b03a0edc00e820fcf6170
3
+ size 74
data/index/uuid_to_id_569e1be0-57b0-4ef4-abcc-a40cccd1b383.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5b7aeb57bbd35985867677f88b1dc7af6e65386786a73d065dc2f117bf9e21
3
+ size 7120
data/index/uuid_to_id_68425e73-3b8d-47fd-9dd0-ceb0ad113836.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec75e2300d30997d654cb028c6f47bccf2e809ddd3dc703386ba9c0bf2142d07
3
+ size 16405