Keane Moraes commited on
Commit
2abcb58
1 Parent(s): 2b58524

changes to q&a and mindmap varaibles

Browse files
Files changed (1) hide show
  1. app.py +84 -121
app.py CHANGED
@@ -42,42 +42,8 @@ takeaways = []
42
  folder_name = "./tests"
43
  input_accepted = False
44
  is_completed_analysis = False
45
-
46
- def get_initial_message():
47
- messages=[
48
- {"role": "system", "content": "You are a helpful AI Tutor. Who anwers brief questions about AI."},
49
- {"role": "user", "content": "I want to learn AI"},
50
- {"role": "assistant", "content": "Thats awesome, what do you want to know aboout AI"}
51
- ]
52
- return messages
53
-
54
- nodes = []
55
- edges = []
56
-
57
- nodes.append( Node(id="Spiderman",
58
- label="Peter Parker",
59
- size=25,
60
- shape="circularImage",
61
- image="http://marvel-force-chart.surge.sh/marvel_force_chart_img/top_spiderman.png")
62
- ) # includes **kwargs
63
- nodes.append( Node(id="Captain_Marvel",
64
- size=25,
65
- shape="circularImage",
66
- image="http://marvel-force-chart.surge.sh/marvel_force_chart_img/top_captainmarvel.png")
67
- )
68
- edges.append( Edge(source="Captain_Marvel",
69
- label="friend_of",
70
- target="Spiderman",
71
- )
72
- )
73
-
74
- config = Config(width=750,
75
- height=950,
76
- directed=True,
77
- physics=True,
78
- hierarchical=False,
79
- )
80
-
81
 
82
  user_secret = os.getenv("OPENAI_API_KEY")
83
 
@@ -151,10 +117,6 @@ with st.sidebar:
151
  else:
152
  st.error("Please type in your youtube link or upload the PDF")
153
  st.experimental_rerun()
154
-
155
- # Save the transcript information
156
- with open(f"{folder_name}/data_transcription.json", "w") as f:
157
- json.dump(data_transcription, f, indent=4)
158
 
159
  # Generate embeddings
160
  if not os.path.exists(f"{folder_name}/word_embeddings.csv"):
@@ -227,11 +189,11 @@ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["Introduction", "Summary", "Transc
227
 
228
  # =========== INTRODUCTION ===========
229
  with tab1:
230
- st.subheader("Introduction")
231
  st.markdown("## How do I use this?")
232
  st.markdown("Do one of the following")
233
  st.markdown('* Type in your youtube URL that you want worked on')
234
  st.markdown('* Place the PDF file that you want worked on')
 
235
  st.markdown("**Once the file / url has finished saving, a 'Start Analysis' button will appear. Click on this button to begin the note generation**")
236
  st.warning("NOTE: This is just a demo product in alpha testing. Any and all bugs will soon be fixed")
237
  st.warning("After the note taking is done, you will see multiple tabs for more information")
@@ -278,88 +240,89 @@ with tab5:
278
  st.warning("Please wait for the analysis to finish")
279
 
280
  # =========== QUERY BOT ===========
281
- with tab6:
282
- if 'generated' not in st.session_state:
283
- st.session_state['generated'] = []
284
-
285
- if 'past' not in st.session_state:
286
- st.session_state['past'] = []
287
-
288
- def get_text():
289
- st.header("Ask me something about the video:")
290
- input_text = st.text_input("You: ", key="prompt")
291
- return input_text
292
-
293
-
294
- def get_embedding_text(prompt):
295
- response = openai.Embedding.create(
296
- input= prompt.strip(),
297
- model="text-embedding-ada-002"
298
- )
299
- q_embedding = response['data'][0]['embedding']
300
- print("the folder name at got here 1.5 is ", folder_name)
301
- df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
302
- df['embedding'] = df['embedding'].apply(eval).apply(np.array)
303
-
304
- df['distances'] = distances_from_embeddings(q_embedding, df['embedding'].values, distance_metric='cosine')
305
- returns = []
306
-
307
- # Sort by distance with 2 hints
308
- for i, row in df.sort_values('distances', ascending=True).head(4).iterrows():
309
- # Else add it to the text that is being returned
310
- returns.append(row["text"])
311
-
312
- # Return the context
313
- return "\n\n###\n\n".join(returns)
314
-
315
- def generate_response(prompt):
316
- one_shot_prompt = '''
317
- I am YoutubeGPT, a highly intelligent question answering bot.
318
- If you ask me a question that is rooted in truth, I will give you the answer.
319
- Q: What is human life expectancy in the United States?
320
- A: Human life expectancy in the United States is 78 years.
321
- Q: '''+prompt+'''
322
- A:
323
- '''
324
- completions = openai.Completion.create(
325
- engine = "text-davinci-003",
326
- prompt = one_shot_prompt,
327
- max_tokens = 1024,
328
- n = 1,
329
- stop=["Q:"],
330
- temperature=0.5,
331
- )
332
- message = completions.choices[0].text
333
- return message
334
-
335
  if is_completed_analysis:
336
- user_input = get_text()
337
- print("user input is ", user_input)
338
- print("the folder name at got here 0.5 is ", folder_name)
339
- else:
340
- user_input = None
341
-
342
- if 'messages' not in st.session_state:
343
- st.session_state['messages'] = get_initial_message()
344
-
345
- if user_input:
346
- print("got here 1")
347
- print("the folder name at got here 1.5 is ", folder_name)
348
- text_embedding = get_embedding_text(user_input)
349
- print("the folder name at got here 1.5 is ", folder_name)
350
- print("got here 2")
351
- title = data_transcription['title']
352
- string_title = "\n\n###\n\n".join(title)
353
- user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
354
- print("got here 3")
355
- output = generate_response(user_input_embedding)
356
- st.session_state.past.append(user_input)
357
- st.session_state.generated.append(output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
- if st.session_state['generated']:
360
- for i in range(len(st.session_state['generated'])-1, -1, -1):
361
- message(st.session_state["generated"][i], key=str(i))
362
- message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
 
365
  # st.header("What else")
 
42
  folder_name = "./tests"
43
  input_accepted = False
44
  is_completed_analysis = False
45
+ if not os.path.exists(folder_name):
46
+ os.mkdir(folder_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  user_secret = os.getenv("OPENAI_API_KEY")
49
 
 
117
  else:
118
  st.error("Please type in your youtube link or upload the PDF")
119
  st.experimental_rerun()
 
 
 
 
120
 
121
  # Generate embeddings
122
  if not os.path.exists(f"{folder_name}/word_embeddings.csv"):
 
189
 
190
  # =========== INTRODUCTION ===========
191
  with tab1:
 
192
  st.markdown("## How do I use this?")
193
  st.markdown("Do one of the following")
194
  st.markdown('* Type in your youtube URL that you want worked on')
195
  st.markdown('* Place the PDF file that you want worked on')
196
+ st.markdown('* Place the audio file that you want worked on')
197
  st.markdown("**Once the file / url has finished saving, a 'Start Analysis' button will appear. Click on this button to begin the note generation**")
198
  st.warning("NOTE: This is just a demo product in alpha testing. Any and all bugs will soon be fixed")
199
  st.warning("After the note taking is done, you will see multiple tabs for more information")
 
240
  st.warning("Please wait for the analysis to finish")
241
 
242
  # =========== QUERY BOT ===========
243
+ with tab6:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  if is_completed_analysis:
245
+ if 'generated' not in st.session_state:
246
+ st.session_state['generated'] = []
247
+
248
+ if 'past' not in st.session_state:
249
+ st.session_state['past'] = []
250
+
251
+ def get_text():
252
+ st.header("Ask me something about the video:")
253
+ input_text = st.text_input("You: ", key="prompt")
254
+ return input_text
255
+
256
+
257
+ def get_embedding_text(prompt):
258
+ response = openai.Embedding.create(
259
+ input= prompt.strip(),
260
+ model="text-embedding-ada-002"
261
+ )
262
+ q_embedding = response['data'][0]['embedding']
263
+ print("the folder name at got here 1.5 is ", folder_name)
264
+ # df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
265
+ data['embedding'] = data['embedding'].apply(eval).apply(np.array)
266
+
267
+ data['distances'] = distances_from_embeddings(q_embedding, data['embedding'].values, distance_metric='cosine')
268
+ returns = []
269
+
270
+ # Sort by distance with 2 hints
271
+ for i, row in data.sort_values('distances', ascending=True).head(4).iterrows():
272
+ # Else add it to the text that is being returned
273
+ returns.append(row["text"])
274
+
275
+ # Return the context
276
+ return "\n\n###\n\n".join(returns)
277
+
278
+ def generate_response(prompt):
279
+ one_shot_prompt = '''
280
+ I am YoutubeGPT, a highly intelligent question answering bot.
281
+ If you ask me a question that is rooted in truth, I will give you the answer.
282
+ Q: What is human life expectancy in the United States?
283
+ A: Human life expectancy in the United States is 78 years.
284
+ Q: '''+prompt+'''
285
+ A:
286
+ '''
287
+ completions = openai.Completion.create(
288
+ engine = "text-davinci-003",
289
+ prompt = one_shot_prompt,
290
+ max_tokens = 1024,
291
+ n = 1,
292
+ stop=["Q:"],
293
+ temperature=0.5,
294
+ )
295
+ message = completions.choices[0].text
296
+ return message
297
+
298
+ if is_completed_analysis:
299
+ user_input = get_text()
300
+ print("user input is ", user_input)
301
+ print("the folder name at got here 0.5 is ", folder_name)
302
+ else:
303
+ user_input = None
304
 
305
+ if 'messages' not in st.session_state:
306
+ st.session_state['messages'] = get_initial_message()
307
+
308
+ if user_input:
309
+ print("got here 1")
310
+ print("the folder name at got here 1.5 is ", folder_name)
311
+ text_embedding = get_embedding_text(user_input)
312
+ print("the folder name at got here 1.5 is ", folder_name)
313
+ print("got here 2")
314
+ title = data_transcription['title']
315
+ string_title = "\n\n###\n\n".join(title)
316
+ user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
317
+ print("got here 3")
318
+ output = generate_response(user_input_embedding)
319
+ st.session_state.past.append(user_input)
320
+ st.session_state.generated.append(output)
321
+
322
+ if st.session_state['generated']:
323
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
324
+ message(st.session_state["generated"][i], key=str(i))
325
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
326
 
327
 
328
  # st.header("What else")