Keane Moraes commited on
Commit
e9d1d9f
1 Parent(s): f76e4eb

fixed redundancy with q&a

Browse files
Files changed (1) hide show
  1. app.py +79 -80
app.py CHANGED
@@ -105,6 +105,7 @@ def generate_text_chunks_lib():
105
  key_engine = Keywords(title_entry)
106
  keywords = key_engine.get_keywords(text_chunks_lib)
107
 
 
108
  # =========== SIDEBAR FOR GENERATION ===========
109
  with st.sidebar:
110
  youtube_link = st.text_input(label = "Type in your Youtube link", placeholder = "", key="url")
@@ -258,88 +259,86 @@ with tab5:
258
 
259
  # =========== QUERY BOT ===========
260
  with tab6:
261
- if is_completed_analysis:
262
- if 'generated' not in st.session_state:
263
- st.session_state['generated'] = []
264
-
265
- if 'past' not in st.session_state:
266
- st.session_state['past'] = []
267
-
268
- def get_text():
269
- st.header("Ask me something about the video:")
270
- input_text = st.text_input("You: ", key="prompt")
271
- return input_text
272
-
273
-
274
- def get_embedding_text(prompt):
275
- response = openai.Embedding.create(
276
- input= prompt.strip(),
277
- model="text-embedding-ada-002"
278
- )
279
- q_embedding = response['data'][0]['embedding']
280
- print("the folder name at got here 1.5 is ", folder_name)
281
- # df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
282
- data['embedding'] = data['embedding'].apply(eval).apply(np.array)
283
-
284
- data['distances'] = distances_from_embeddings(q_embedding, data['embedding'].values, distance_metric='cosine')
285
- returns = []
286
-
287
- # Sort by distance with 2 hints
288
- for i, row in data.sort_values('distances', ascending=True).head(4).iterrows():
289
- # Else add it to the text that is being returned
290
- returns.append(row["text"])
291
-
292
- # Return the context
293
- return "\n\n###\n\n".join(returns)
294
-
295
- def generate_response(prompt):
296
- one_shot_prompt = '''
297
- I am YoutubeGPT, a highly intelligent question answering bot.
298
- If you ask me a question that is rooted in truth, I will give you the answer.
299
- Q: What is human life expectancy in the United States?
300
- A: Human life expectancy in the United States is 78 years.
301
- Q: '''+prompt+'''
302
- A:
303
- '''
304
- completions = openai.Completion.create(
305
- engine = "text-davinci-003",
306
- prompt = one_shot_prompt,
307
- max_tokens = 1024,
308
- n = 1,
309
- stop=["Q:"],
310
- temperature=0.5,
311
- )
312
- message = completions.choices[0].text
313
- return message
314
-
315
- if is_completed_analysis:
316
- user_input = get_text()
317
- print("user input is ", user_input)
318
- print("the folder name at got here 0.5 is ", folder_name)
319
- else:
320
- user_input = None
321
 
322
- if 'messages' not in st.session_state:
323
- st.session_state['messages'] = get_initial_message()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
- if user_input:
326
- print("got here 1")
327
- print("the folder name at got here 1.5 is ", folder_name)
328
- text_embedding = get_embedding_text(user_input)
329
- print("the folder name at got here 1.5 is ", folder_name)
330
- print("got here 2")
331
- title = data_transcription['title']
332
- string_title = "\n\n###\n\n".join(title)
333
- user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
334
- print("got here 3")
335
- output = generate_response(user_input_embedding)
336
- st.session_state.past.append(user_input)
337
- st.session_state.generated.append(output)
338
-
339
- if st.session_state['generated']:
340
- for i in range(len(st.session_state['generated'])-1, -1, -1):
341
- message(st.session_state["generated"][i], key=str(i))
342
- message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
343
 
344
 
345
  # st.header("What else")
 
105
  key_engine = Keywords(title_entry)
106
  keywords = key_engine.get_keywords(text_chunks_lib)
107
 
108
+
109
  # =========== SIDEBAR FOR GENERATION ===========
110
  with st.sidebar:
111
  youtube_link = st.text_input(label = "Type in your Youtube link", placeholder = "", key="url")
 
259
 
260
  # =========== QUERY BOT ===========
261
  with tab6:
262
+
263
+ if 'generated' not in st.session_state:
264
+ st.session_state['generated'] = []
265
+
266
+ if 'past' not in st.session_state:
267
+ st.session_state['past'] = []
268
+
269
+ def get_text():
270
+ st.header("Ask me something about the video:")
271
+ input_text = st.text_input("You: ", key="prompt")
272
+ return input_text
273
+
274
+
275
+ def get_embedding_text(prompt):
276
+ response = openai.Embedding.create(
277
+ input= prompt.strip(),
278
+ model="text-embedding-ada-002"
279
+ )
280
+ q_embedding = response['data'][0]['embedding']
281
+ print("the folder name at got here 1.5 is ", folder_name)
282
+ # df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
283
+ data['embedding'] = data['embedding'].apply(eval).apply(np.array)
284
+
285
+ data['distances'] = distances_from_embeddings(q_embedding, data['embedding'].values, distance_metric='cosine')
286
+ returns = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # Sort by distance with 2 hints
289
+ for i, row in data.sort_values('distances', ascending=True).head(4).iterrows():
290
+ # Else add it to the text that is being returned
291
+ returns.append(row["text"])
292
+
293
+ # Return the context
294
+ return "\n\n###\n\n".join(returns)
295
+
296
+ def generate_response(prompt):
297
+ one_shot_prompt = '''
298
+ I am YoutubeGPT, a highly intelligent question answering bot.
299
+ If you ask me a question that is rooted in truth, I will give you the answer.
300
+ Q: What is human life expectancy in the United States?
301
+ A: Human life expectancy in the United States is 78 years.
302
+ Q: '''+prompt+'''
303
+ A:
304
+ '''
305
+ completions = openai.Completion.create(
306
+ engine = "text-davinci-003",
307
+ prompt = one_shot_prompt,
308
+ max_tokens = 1024,
309
+ n = 1,
310
+ stop=["Q:"],
311
+ temperature=0.5,
312
+ )
313
+ message = completions.choices[0].text
314
+ return message
315
+
316
+
317
+ user_input = get_text()
318
+ print("user input is ", user_input)
319
+ print("the folder name at got here 0.5 is ", folder_name)
320
+
321
+ # if 'messages' not in st.session_state:
322
+ # st.session_state['messages'] = get_initial_message()
323
+
324
+ if user_input:
325
+ print("got here 1")
326
+ print("the folder name at got here 1.5 is ", folder_name)
327
+ text_embedding = get_embedding_text(user_input)
328
+ print("the folder name at got here 1.5 is ", folder_name)
329
+ print("got here 2")
330
+ title = data_transcription['title']
331
+ string_title = "\n\n###\n\n".join(title)
332
+ user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
333
+ print("got here 3")
334
+ output = generate_response(user_input_embedding)
335
+ st.session_state.past.append(user_input)
336
+ st.session_state.generated.append(output)
337
 
338
+ if st.session_state['generated']:
339
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
340
+ message(st.session_state["generated"][i], key=str(i))
341
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
 
344
  # st.header("What else")