varun500 commited on
Commit
02f296a
1 Parent(s): e41d951

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -33
app.py CHANGED
@@ -59,11 +59,26 @@ def save_key(api_key):
59
  return api_key
60
 
61
 
62
- def query_pinecone(query, top_k, model, index):
63
  # generate embeddings for the query
64
  xq = model.encode([query]).tolist()
65
  # search pinecone index for context passage with the answer
66
- xc = index.query(xq, top_k=top_k, include_metadata=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return xc
68
 
69
 
@@ -96,13 +111,10 @@ def text_lookup(data, sentence_ids):
96
  return context
97
 
98
 
99
- def gpt3_summary(result, query):
100
- prompt = f"""Answer the question based on the following information:
101
- {result}
102
- Question: {query} """
103
  response = openai.Completion.create(
104
- model="text-ada-001",
105
- prompt= prompt,
106
  temperature=0.1,
107
  max_tokens=512,
108
  top_p=1.0,
@@ -126,77 +138,115 @@ def gpt3_qa(query, answer):
126
  return response.choices[0].text
127
 
128
 
129
- st.title("Abstractive Question Answering - APPL")
 
 
 
 
130
 
131
  query_text = st.text_input("Input Query", value="Who is the CEO of Apple?")
132
 
133
- num_results = int(st.number_input("Number of Results to query", 1, 5, value=2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
 
136
  # Choose encoder model
137
 
138
- encoder_models_choice = ["MPNET", "SGPT"]
139
 
140
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
141
 
142
 
143
  # Choose decoder model
144
 
145
- decoder_models_choice = ["GPT3 (QA_davinci)", "GPT3 (text_davinci)", "T5", "FLAN-T5"]
146
 
147
  decoder_model = st.selectbox("Select Decoder Model", decoder_models_choice)
148
 
149
 
150
  if encoder_model == "MPNET":
151
  # Connect to pinecone environment
152
- pinecone.init(
153
- api_key="ea9fd320-6f8a-4edd-bf41-9e972b95cbf9", environment="us-east1-gcp"
154
- )
155
  pinecone_index_name = "week2-all-mpnet-base"
156
  pinecone_index = pinecone.Index(pinecone_index_name)
157
  retriever_model = get_mpnet_embedding_model()
158
 
159
  elif encoder_model == "SGPT":
160
  # Connect to pinecone environment
161
- pinecone.init(
162
- api_key="0d8215d7-4ad5-4c76-8c45-4a40c0f6a1b7", environment="us-east1-gcp"
163
- )
164
  pinecone_index_name = "week2-sgpt-125m"
165
  pinecone_index = pinecone.Index(pinecone_index_name)
166
  retriever_model = get_sgpt_embedding_model()
167
 
168
 
169
- query_results = query_pinecone(query_text, num_results, retriever_model, pinecone_index)
170
 
171
- window = int(st.number_input("Sentence Window Size", 1, 3, value=1))
 
 
 
 
172
 
173
  data = get_data()
174
 
175
- # context_list = format_query(query_results)
176
- context_list = sentence_id_combine(data, query_results, lag=window)
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
 
179
  st.subheader("Answer:")
180
 
181
 
182
- if decoder_model == "GPT3 (text_davinci)":
183
  openai_key = st.text_input(
184
  "Enter OpenAI key",
185
- value="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",
186
  type="password",
187
  )
188
  api_key = save_key(openai_key)
189
  openai.api_key = api_key
190
- #output_text = []
191
- #for context_text in context_list:
192
- # output_text.append(gpt3_summary(context_text))
193
- #generated_text = ". ".join(output_text)
194
- st.write(gpt3_summary(context_list,query_text))
195
 
196
  elif decoder_model == "GPT3 (QA_davinci)":
197
  openai_key = st.text_input(
198
  "Enter OpenAI key",
199
- value="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",
200
  type="password",
201
  )
202
  api_key = save_key(openai_key)
@@ -226,8 +276,6 @@ elif decoder_model == "FLAN-T5":
226
  show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)
227
 
228
  if show_retrieved_text:
229
-
230
  st.subheader("Retrieved Text:")
231
-
232
  for context_text in context_list:
233
- st.markdown(f"- {context_text}")
 
59
  return api_key
60
 
61
 
62
+ def query_pinecone(query, top_k, model, index, year, quarter, ticker, threshold=0.5):
63
  # generate embeddings for the query
64
  xq = model.encode([query]).tolist()
65
  # search pinecone index for context passage with the answer
66
+ xc = index.query(
67
+ xq,
68
+ top_k=top_k,
69
+ filter={
70
+ "Year": int(year),
71
+ "Quarter": {"$eq": quarter},
72
+ "Ticker": {"$eq": ticker},
73
+ },
74
+ include_metadata=True,
75
+ )
76
+ # filter the context passages based on the score threshold
77
+ filtered_matches = []
78
+ for match in xc["matches"]:
79
+ if match["score"] >= threshold:
80
+ filtered_matches.append(match)
81
+ xc["matches"] = filtered_matches
82
  return xc
83
 
84
 
 
111
  return context
112
 
113
 
114
+ def gpt3_summary(text):
 
 
 
115
  response = openai.Completion.create(
116
+ model="text-davinci-003",
117
+ prompt=text + "\n\nTl;dr",
118
  temperature=0.1,
119
  max_tokens=512,
120
  top_p=1.0,
 
138
  return response.choices[0].text
139
 
140
 
141
+ st.title("Abstractive Question Answering")
142
+
143
+ st.write(
144
+ "The app uses the quarterly earnings call transcripts for 10 companies (Apple, AMD, Amazon, Cisco, Google, Microsoft, Nvidia, ASML, Intel, Micron) for the years 2016 to 2020."
145
+ )
146
 
147
  query_text = st.text_input("Input Query", value="Who is the CEO of Apple?")
148
 
149
+ years_choice = ["2016", "2017", "2018", "2019", "2020"]
150
+
151
+ year = st.selectbox("Year", years_choice)
152
+
153
+ quarter = st.selectbox("Quarter", ["Q1", "Q2", "Q3", "Q4"])
154
+
155
+ ticker_choice = [
156
+ "AAPL",
157
+ "CSCO",
158
+ "MSFT",
159
+ "ASML",
160
+ "NVDA",
161
+ "GOOGL",
162
+ "MU",
163
+ "INTC",
164
+ "AMZN",
165
+ "AMD",
166
+ ]
167
+
168
+ ticker = st.selectbox("Company", ticker_choice)
169
+
170
+ num_results = int(st.number_input("Number of Results to query", 1, 5, value=3))
171
 
172
 
173
  # Choose encoder model
174
 
175
+ encoder_models_choice = ["SGPT", "MPNET"]
176
 
177
  encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
178
 
179
 
180
  # Choose decoder model
181
 
182
+ decoder_models_choice = ["FLAN-T5", "T5", "GPT3 (QA_davinci)", "GPT3 (summary_davinci)"]
183
 
184
  decoder_model = st.selectbox("Select Decoder Model", decoder_models_choice)
185
 
186
 
187
  if encoder_model == "MPNET":
188
  # Connect to pinecone environment
189
+ pinecone.init(api_key=st.secrets["pinecone_mpnet"], environment="us-east1-gcp")
 
 
190
  pinecone_index_name = "week2-all-mpnet-base"
191
  pinecone_index = pinecone.Index(pinecone_index_name)
192
  retriever_model = get_mpnet_embedding_model()
193
 
194
  elif encoder_model == "SGPT":
195
  # Connect to pinecone environment
196
+ pinecone.init(api_key=st.secrets["pinecone_sgpt"], environment="us-east1-gcp")
 
 
197
  pinecone_index_name = "week2-sgpt-125m"
198
  pinecone_index = pinecone.Index(pinecone_index_name)
199
  retriever_model = get_sgpt_embedding_model()
200
 
201
 
202
+ window = int(st.number_input("Sentence Window Size", 0, 3, value=0))
203
 
204
+ threshold = float(
205
+ st.number_input(
206
+ label="Similarity Score Threshold", step=0.05, format="%.2f", value=0.55
207
+ )
208
+ )
209
 
210
  data = get_data()
211
 
212
+ query_results = query_pinecone(
213
+ query_text,
214
+ num_results,
215
+ retriever_model,
216
+ pinecone_index,
217
+ year,
218
+ quarter,
219
+ ticker,
220
+ threshold,
221
+ )
222
+
223
+ if threshold <= 0.60:
224
+ context_list = sentence_id_combine(data, query_results, lag=window)
225
+ else:
226
+ context_list = format_query(query_results)
227
 
228
 
229
  st.subheader("Answer:")
230
 
231
 
232
+ if decoder_model == "GPT3 (summary_davinci)":
233
  openai_key = st.text_input(
234
  "Enter OpenAI key",
235
+ value=st.secrets["openai_key"],
236
  type="password",
237
  )
238
  api_key = save_key(openai_key)
239
  openai.api_key = api_key
240
+ output_text = []
241
+ for context_text in context_list:
242
+ output_text.append(gpt3_summary(context_text))
243
+ generated_text = ". ".join(output_text)
244
+ st.write(gpt3_summary(generated_text))
245
 
246
  elif decoder_model == "GPT3 (QA_davinci)":
247
  openai_key = st.text_input(
248
  "Enter OpenAI key",
249
+ value=st.secrets["openai_key"],
250
  type="password",
251
  )
252
  api_key = save_key(openai_key)
 
276
  show_retrieved_text = st.checkbox("Show Retrieved Text", value=False)
277
 
278
  if show_retrieved_text:
 
279
  st.subheader("Retrieved Text:")
 
280
  for context_text in context_list:
281
+ st.markdown(f"- {context_text}")