Spaces:

arkaprav0
/

gpt-transcript-plugin

Sleeping

App Files Files Community

gpt-transcript-plugin / utils.py

arkaprav0

Update utils.py

8af421d almost 2 years ago

raw

history blame contribute delete

4.96 kB

	import re
	import openai
	import concurrent.futures
	import os
	import sys



	def convert_to_dict(text):
	"""Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.

	Args:
	text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".

	Returns:
	A dictionary mapping the seconds to the text.
	"""

	result = {}
	for line in text.splitlines():
	match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
	if match:
	seconds = float(match.group(1))
	text = match.group(2)
	result[seconds] = text
	return result

	def process_dict(text, batch_size=20):
	"""Processes a dictionary by combining the values of 20 elements into a single string.

	Args:
	dict_in: A dictionary mapping seconds to text.
	batch_size: The number of elements to combine into a single string.

	Returns:
	A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
	"""
	dict_in = convert_to_dict(text)
	result = {}
	current_batch = []
	current_key = None
	for seconds, text in dict_in.items():
	if current_key is None:
	current_key = seconds
	current_batch.append(text)
	if len(current_batch) == batch_size:
	combined_value = " ".join(current_batch)
	result[current_key] = combined_value
	current_batch = []
	current_key = None
	if current_batch:
	combined_value = " ".join(current_batch)
	result[current_key] = combined_value
	return result


	def call3(chunk):
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	temperature= 0,
	messages=[
	{"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
	{"role": "user", "content": str(chunk)}
	]
	)
	return response['choices'][0]['message']['content']


	def run_gpt_3(dict_in, function=call3):
	"""Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.

	Args:
	dict_in: A dictionary mapping keys to values.
	function: A function that takes a value as input and returns a value as output.

	Returns:
	A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
	"""

	with concurrent.futures.ThreadPoolExecutor() as executor:
	futures = [executor.submit(function, value) for value in dict_in.values()]
	results = [future.result() for future in futures]

	return dict(zip(dict_in.keys(), results))


	def call4(chunk):
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	temperature= .3,
	messages=[
	{"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds.
	Output in a python dictionary format whose structure is this:
	{
	hook: "the hook"
	summary: "summary"
	chapters: {
	timestamp : "chapter"
	timestamp : "chapter"
	}
	}when
	hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
	Ex.One serendipitous relationship led him to start a company & change his life forever.
	summary: Include main talking points and key phrases that will appeal to your
	ideal listener. keep it concise.
	chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible.




	"""},
	{"role": "user", "content": str(chunk)}
	]
	)
	return response['choices'][0]['message']['content']



	def clean_and_concatenate_dict_values(dict_in):
	"""Cleans and concatenates the values of a dictionary. before sending to 4

	Args:
	dict_in: A dictionary mapping keys to values.

	Returns:
	A long string containing the concatenated values of the dictionary, with each value preceded by its key.
	"""

	result = ""
	for key, value in dict_in.items():
	# Clean the value
	value = value.strip()
	value = value.replace("- ", "")

	# Concatenate the value to the result
	result += f"{key}: {value}\n"

	return result




















	# text = """
	# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
	# 11.32 seconds - He's super nice.
	# 16.56 seconds - I'm really enjoying this book.
	# 21.80 seconds - I can't wait to see what happens next.
	# 27.04 seconds - This is a great read.
	# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
	# """

	# result = convert_to_dict(text)
	# new_result = process_dict(result)

	# # print(list(new_result.values())[7])

	# new_result