sean-carrol-explains

Runtime error

App Files Files Community

sean-carrol-explains / app.py

bejaeger

Update app.py

9a91e05 over 2 years ago

raw

history blame contribute delete

7.48 kB

	import streamlit as st
	import pinecone
	from sentence_transformers import SentenceTransformer
	import logging
	import openai

	PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
	OPENAI_KEY = st.secrets["OPENAI_KEY"]
	INDEX_ID = 'sean-carrol-biggest-ideas-of-the-universe'

	@st.experimental_singleton
	def init_openai():
	openai.api_key = OPENAI_KEY

	@st.experimental_singleton
	def init_pinecone():
	pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
	return pinecone.Index(INDEX_ID)

	@st.experimental_singleton
	def init_retriever():
	return SentenceTransformer("multi-qa-mpnet-base-dot-v1")

	def make_query(query, retriever, top_k=3, include_values=True, include_metadata=True, filter=None):
	xq = retriever.encode([query]).tolist()
	logging.info(f"Query: {query}")
	attempt = 0
	while attempt < 3:
	try:
	xc = st.session_state.index.query(
	xq,
	top_k=top_k,
	include_values=include_values,
	include_metadata=include_metadata,
	filter=filter
	)
	matches = xc['matches']
	break
	except:
	# force reload
	pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
	st.session_state.index = pinecone.Index(INDEX_ID)
	attempt += 1
	matches = []
	if len(matches) == 0:
	logging.error(f"Query failed")
	return matches

	def get_prompt(matches):
	contexts = [
	x['metadata']['text'] for x in matches
	]
	prompt_start = (
	"Answer the question based on the context below.\n\n"+
	"Context:\n"
	)
	prompt_end = (
	f"\n\nQuestion: {query}\nAnswer:"
	)
	limit = 3750

	for i in range(1, len(contexts)):
	if len("\n\n--\n\n".join(contexts[:i])) >= limit:
	prompt = (
	prompt_start +
	"\n\n--\n\n".join(contexts[:i-1]) +
	prompt_end
	)
	break
	elif i == len(contexts) - 1:
	prompt = (
	prompt_start +
	"\n\n--\n\n".join(contexts) +
	prompt_end
	)
	return prompt

	init_openai()
	st.session_state.index = init_pinecone()
	retriever = init_retriever()

	def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
	meta = [(e, s, u, c) for e, s, u, c in zip(ends, starts, urls, contexts)]
	meta.sort(reverse=False)
	text_content = []
	current_start = 0
	current_end = 0
	for end, start, url, context in meta:
	# reformat seconds to timestamp
	time = start / 60
	mins = f"0{int(time)}"[-2:]
	secs = f"0{int(round((time - int(mins))*60, 0))}"[-2:]
	timestamp = f"{mins}:{secs}"
	if start < current_end and start > current_start:
	# this means it is a continuation of the previous sentence
	text_content[-1][0] = text_content[-1][0].split(context[:10])[0]
	text_content.append([f"[{timestamp}] {context.capitalize()}", url])
	else:
	text_content.append(["xxLINEBREAKxx", ""])
	text_content.append([f"[{timestamp}] {context}", url])
	current_start = start
	current_end = end
	html_text = ""
	for text, url in text_content:
	if text == "xxLINEBREAKxx":
	html_text += "<br>"
	else:
	html_text += f"<small><a href={url}>{text.strip()}... </a></small>"
	print(text)
	html = f"""
	<div class="container-fluid">
	<div class="row align-items-start">
	<div class="col-md-4 col-sm-4">
	<div class="position-relative">
	<a href={urls[0]}><img src={thumbnail} class="img-fluid" style="width: 192px; height: 106px"></a>
	</div>
	</div>
	<div class="col-md-8 col-sm-8">
	<h2>{title}</h2>
	</div>
	<div>
	{html_text}
	<br><br>
	"""
	return st.markdown(html, unsafe_allow_html=True)

	channel_map = {
	'James Briggs': 'UCv83tO5cePwHMt1952IVVHw',
	'Daniel Bourke': 'UCr8O8l5cCX85Oem1d18EezQ',
	'Yannic Kilcher': 'UCZHmQk67mSJgfCCTn7xBfew',
	'AI Coffee Break with Letitia': 'UCobqgqE4i5Kf7wrxRxhToQA',
	'sentdex': 'UCfzlCWGWYyIQ0aLC5w48gBQ'
	}

	st.write("""
	# Sean Carroll Explains
	""")

	st.info("""
	Ask any question about Sean Carroll's video series 'The Biggest Ideas in the Universe'.
	The search is built using OpenAI's Whisper, SentenceTransformer, GPT-3, and Pinecone, and is built off of James Brigg's [example](https://pinecone.io/learn/openai-whisper)!
	""")

	st.markdown("""
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.0.0/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
	""", unsafe_allow_html=True)

	query = st.text_input("Ask about the universe...", "")

	st.checkbox("Generate summary with GPT-3?", key="summarize")
	# with st.expander("Advanced Options"):
	# channel_options = st.multiselect(
	# 'Channels to Search',
	# ['James Briggs', 'Daniel Bourke', 'Yannic Kilcher', 'AI Coffee Break with Letitia', 'sentdex'],
	# ['James Briggs', 'Daniel Bourke', 'Yannic Kilcher', 'AI Coffee Break with Letitia', 'sentdex']
	# )

	if query != "":
	# channels = [channel_map[name] for name in channel_options]
	print(f"query: {query}")
	matches = make_query(
	query, retriever, top_k=5,
	# filter={
	# 'channel_id': {'$in': channels}
	# }
	)
	if st.session_state.summarize:
	prompt = get_prompt(matches)
	res = openai.Completion.create(
	engine='text-davinci-003',
	prompt=prompt,
	temperature=0,
	max_tokens=300,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0,
	stop=".",
	)
	summary = res['choices'][0]['text'].strip()
	st.info(f"Summary:\n{summary}")

	results = {}
	order = []
	for context in matches:
	video_id = context['metadata']['url'].split('/')[-1]
	if video_id not in results:
	results[video_id] = {
	'title': context['metadata']['title'],
	'urls': [f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"],
	'contexts': [context['metadata']['text']],
	'starts': [int(context['metadata']['start'])],
	'ends': [int(context['metadata']['end'])]
	}
	order.append(video_id)
	else:
	results[video_id]['urls'].append(
	f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"
	)
	results[video_id]['contexts'].append(
	context['metadata']['text']
	)
	results[video_id]['starts'].append(int(context['metadata']['start']))
	results[video_id]['ends'].append(int(context['metadata']['end']))
	# now display cards
	for video_id in order:
	card(
	thumbnail=f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg",
	title=results[video_id]['title'],
	urls=results[video_id]['urls'],
	contexts=results[video_id]['contexts'],
	starts=results[video_id]['starts'],
	ends=results[video_id]['ends']
	)