snrspeaks commited on
Commit
116bc23
β€’
1 Parent(s): 59f2d94

Upload 7 files

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -0
  2. 10KGPTLogo.png +0 -0
  3. Dockerfile +11 -0
  4. app.py +154 -0
  5. chainlit.md +0 -0
  6. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
37
+ data/ filter=lfs diff=lfs merge=lfs -text
10KGPTLogo.png ADDED
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain.document_loaders import PyMuPDFLoader
6
+ import os
7
+ from langchain.agents import initialize_agent, Tool
8
+ from langchain.agents import AgentType
9
+ import chainlit as cl
10
+
11
+ @cl.on_chat_start
12
+ async def start():
13
+ welcome_message1 = "Hey, Welcome to **10K-GPT**!"
14
+ welcome_message2 = "**10K-GPT** is designed to provide you with an interactive, user-friendly interface that lets you pose queries and fetch answers from Form 10-K documents of some of the world's leading tech giants:\n- **Meta**\n- **Amazon**\n- **Alphabet**\n- **Apple**\n- **Microsoft**,\n\nfor the years **2022, 2021, and 2020**.\n\nPlease ask a question to begin!"
15
+
16
+ sample_questions="Some of the questions you can try:\n***"
17
+ elements = [
18
+ cl.Image(path="10KGPTLogo.png", name="10K-GPT", display="inline"),
19
+ # cl.Text(content=welcome_message1, name="10K-GPT", display="inline"),
20
+ ]
21
+ await cl.Message(content=welcome_message1, elements=elements).send()
22
+ await cl.Message(content=welcome_message2).send()
23
+
24
+ @cl.langchain_factory(use_async=False)
25
+ def load():
26
+ embeddings = OpenAIEmbeddings()
27
+ llm = ChatOpenAI(temperature=0, model="gpt-4", streaming=True)
28
+
29
+ apple_2022_docs_store = FAISS.load_local(r'data\datastores\apple_2022', embeddings)
30
+ apple_2021_docs_store = FAISS.load_local(r'data\datastores\apple_2021', embeddings)
31
+ apple_2020_docs_store = FAISS.load_local(r'data\datastores\apple_2020', embeddings)
32
+
33
+ microsoft_2022_docs_store = FAISS.load_local(r'data\datastores\msft_2022', embeddings)
34
+ microsoft_2021_docs_store = FAISS.load_local(r'data\datastores\msft_2021', embeddings)
35
+ microsoft_2020_docs_store = FAISS.load_local(r'data\datastores\msft_2020', embeddings)
36
+
37
+ amazon_2022_docs_store = FAISS.load_local(r'data\datastores\amzn_2022', embeddings)
38
+ amazon_2021_docs_store = FAISS.load_local(r'data\datastores\amzn_2021', embeddings)
39
+ amazon_2020_docs_store = FAISS.load_local(r'data\datastores\amzn_2020', embeddings)
40
+
41
+ alphabet_2022_docs_store = FAISS.load_local(r'data\datastores\alphbt_2022', embeddings)
42
+ alphabet_2021_docs_store = FAISS.load_local(r'data\datastores\alphbt_2021', embeddings)
43
+ alphabet_2020_docs_store = FAISS.load_local(r'data\datastores\alphbt_2020', embeddings)
44
+
45
+ meta_2022_docs_store = FAISS.load_local(r'data\datastores\meta_2022', embeddings)
46
+ meta_2021_docs_store = FAISS.load_local(r'data\datastores\meta_2021', embeddings)
47
+ meta_2020_docs_store = FAISS.load_local(r'data\datastores\meta_2020', embeddings)
48
+
49
+
50
+ apple_2022_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=apple_2022_docs_store.as_retriever(search_kwargs={'k':5}))
51
+ apple_2021_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=apple_2021_docs_store.as_retriever(search_kwargs={'k':5}))
52
+ apple_2020_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=apple_2020_docs_store.as_retriever(search_kwargs={'k':5}))
53
+
54
+ microsoft_2022_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=microsoft_2022_docs_store.as_retriever(search_kwargs={'k':5}))
55
+ microsoft_2021_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=microsoft_2021_docs_store.as_retriever(search_kwargs={'k':5}))
56
+ microsoft_2020_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=microsoft_2020_docs_store.as_retriever(search_kwargs={'k':5}))
57
+
58
+ amazon_2022_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=amazon_2022_docs_store.as_retriever(search_kwargs={'k':5}))
59
+ amazon_2021_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=amazon_2021_docs_store.as_retriever(search_kwargs={'k':5}))
60
+ amazon_2020_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=amazon_2020_docs_store.as_retriever(search_kwargs={'k':5}))
61
+
62
+ meta_2022_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=meta_2022_docs_store.as_retriever(search_kwargs={'k':5}))
63
+ meta_2021_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=meta_2021_docs_store.as_retriever(search_kwargs={'k':5}))
64
+ meta_2020_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=meta_2020_docs_store.as_retriever(search_kwargs={'k':5}))
65
+
66
+ alphabet_2022_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=alphabet_2022_docs_store.as_retriever(search_kwargs={'k':5}))
67
+ alphabet_2021_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=alphabet_2021_docs_store.as_retriever(search_kwargs={'k':5}))
68
+ alphabet_2020_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=alphabet_2020_docs_store.as_retriever(search_kwargs={'k':5}))
69
+
70
+
71
+
72
+ tools = [
73
+ Tool(
74
+ name="Apple Form 10K 2022",
75
+ func=apple_2022_qa.run,
76
+ description="useful when you need to answer from Apple 2022",
77
+ ),
78
+ Tool(
79
+ name="Apple Form 10K 2021",
80
+ func=apple_2021_qa.run,
81
+ description="useful when you need to answer from Apple 2021",
82
+ ),
83
+ Tool(
84
+ name="Apple Form 10K 2020",
85
+ func=apple_2020_qa.run,
86
+ description="useful when you need to answer from Apple 2020",
87
+ ),
88
+ Tool(
89
+ name="Microsoft Form 10K 2022",
90
+ func=microsoft_2022_qa.run,
91
+ description="useful when you need to answer from Microsoft 2022",
92
+ ),
93
+ Tool(
94
+ name="Microsoft Form 10K 2021",
95
+ func=microsoft_2021_qa.run,
96
+ description="useful when you need to answer from Microsoft 2021",
97
+ ),
98
+ Tool(
99
+ name="Microsoft Form 10K 2020",
100
+ func=microsoft_2020_qa.run,
101
+ description="useful when you need to answer from Microsoft 2020",
102
+ ),
103
+ Tool(
104
+ name="Meta Form 10K 2022",
105
+ func=meta_2022_qa.run,
106
+ description="useful when you need to answer from Meta 2022",
107
+ ),
108
+ Tool(
109
+ name="Meta Form 10K 2021",
110
+ func=meta_2021_qa.run,
111
+ description="useful when you need to answer from Meta 2021",
112
+ ),
113
+ Tool(
114
+ name="Meta Form 10K 2020",
115
+ func=meta_2020_qa.run,
116
+ description="useful when you need to answer from Meta 2020",
117
+ ),
118
+ Tool(
119
+ name="Alphabet Form 10K 2022",
120
+ func=alphabet_2022_qa.run,
121
+ description="useful when you need to answer from Alphabet or Google 2022",
122
+ ),
123
+ Tool(
124
+ name="Alphabet Form 10K 2021",
125
+ func=alphabet_2021_qa.run,
126
+ description="useful when you need to answer from Alphabet or Google 2021",
127
+ ),
128
+ Tool(
129
+ name="Alphabet Form 10K 2020",
130
+ func=alphabet_2020_qa.run,
131
+ description="useful when you need to answer from Alphabet or Google 2020",
132
+ ),
133
+ Tool(
134
+ name="Amazon Form 10K 2022",
135
+ func=amazon_2022_qa.run,
136
+ description="useful when you need to answer from Amazon 2022",
137
+ ),
138
+ Tool(
139
+ name="Amazon Form 10K 2021",
140
+ func=amazon_2021_qa.run,
141
+ description="useful when you need to answer from Amazon 2021",
142
+ ),
143
+ Tool(
144
+ name="Amazon Form 10K 2020",
145
+ func=amazon_2020_qa.run,
146
+ description="useful when you need to answer from Amazon 2020",
147
+ ),
148
+ ]
149
+
150
+ # Construct the agent. We will use the default agent type here.
151
+ # See documentation for a full list of options.
152
+ return initialize_agent(
153
+ tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
154
+ )
chainlit.md ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ chromadb
4
+ pypdf
5
+ tiktoken
6
+ chainlit