arkaprav0's picture
Update utils.py
8af421d
import re
import openai
import concurrent.futures
import os
import sys
def convert_to_dict(text):
"""Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary.
Args:
text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.".
Returns:
A dictionary mapping the seconds to the text.
"""
result = {}
for line in text.splitlines():
match = re.match(r"(\d+\.\d+) seconds - (.*)", line)
if match:
seconds = float(match.group(1))
text = match.group(2)
result[seconds] = text
return result
def process_dict(text, batch_size=20):
"""Processes a dictionary by combining the values of 20 elements into a single string.
Args:
dict_in: A dictionary mapping seconds to text.
batch_size: The number of elements to combine into a single string.
Returns:
A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch.
"""
dict_in = convert_to_dict(text)
result = {}
current_batch = []
current_key = None
for seconds, text in dict_in.items():
if current_key is None:
current_key = seconds
current_batch.append(text)
if len(current_batch) == batch_size:
combined_value = " ".join(current_batch)
result[current_key] = combined_value
current_batch = []
current_key = None
if current_batch:
combined_value = " ".join(current_batch)
result[current_key] = combined_value
return result
def call3(chunk):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature= 0,
messages=[
{"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."},
{"role": "user", "content": str(chunk)}
]
)
return response['choices'][0]['message']['content']
def run_gpt_3(dict_in, function=call3):
"""Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish.
Args:
dict_in: A dictionary mapping keys to values.
function: A function that takes a value as input and returns a value as output.
Returns:
A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value.
"""
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(function, value) for value in dict_in.values()]
results = [future.result() for future in futures]
return dict(zip(dict_in.keys(), results))
def call4(chunk):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature= .3,
messages=[
{"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds.
Output in a python dictionary format whose structure is this:
{
hook: "the hook"
summary: "summary"
chapters: {
timestamp : "chapter"
timestamp : "chapter"
}
}when
hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.)
Ex.One serendipitous relationship led him to start a company & change his life forever.
summary: Include main talking points and key phrases that will appeal to your
ideal listener. keep it concise.
chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible.
"""},
{"role": "user", "content": str(chunk)}
]
)
return response['choices'][0]['message']['content']
def clean_and_concatenate_dict_values(dict_in):
"""Cleans and concatenates the values of a dictionary. before sending to 4
Args:
dict_in: A dictionary mapping keys to values.
Returns:
A long string containing the concatenated values of the dictionary, with each value preceded by its key.
"""
result = ""
for key, value in dict_in.items():
# Clean the value
value = value.strip()
value = value.replace("- ", "")
# Concatenate the value to the result
result += f"{key}: {value}\n"
return result
# text = """
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun.
# 11.32 seconds - He's super nice.
# 16.56 seconds - I'm really enjoying this book.
# 21.80 seconds - I can't wait to see what happens next.
# 27.04 seconds - This is a great read.
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers.
# """
# result = convert_to_dict(text)
# new_result = process_dict(result)
# # print(list(new_result.values())[7])
# new_result