Spaces:
Sleeping
Sleeping
import re | |
import openai | |
import concurrent.futures | |
import os | |
import sys | |
def convert_to_dict(text): | |
"""Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary. | |
Args: | |
text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.". | |
Returns: | |
A dictionary mapping the seconds to the text. | |
""" | |
result = {} | |
for line in text.splitlines(): | |
match = re.match(r"(\d+\.\d+) seconds - (.*)", line) | |
if match: | |
seconds = float(match.group(1)) | |
text = match.group(2) | |
result[seconds] = text | |
return result | |
def process_dict(text, batch_size=20): | |
"""Processes a dictionary by combining the values of 20 elements into a single string. | |
Args: | |
dict_in: A dictionary mapping seconds to text. | |
batch_size: The number of elements to combine into a single string. | |
Returns: | |
A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch. | |
""" | |
dict_in = convert_to_dict(text) | |
result = {} | |
current_batch = [] | |
current_key = None | |
for seconds, text in dict_in.items(): | |
if current_key is None: | |
current_key = seconds | |
current_batch.append(text) | |
if len(current_batch) == batch_size: | |
combined_value = " ".join(current_batch) | |
result[current_key] = combined_value | |
current_batch = [] | |
current_key = None | |
if current_batch: | |
combined_value = " ".join(current_batch) | |
result[current_key] = combined_value | |
return result | |
def call3(chunk): | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
temperature= 0, | |
messages=[ | |
{"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."}, | |
{"role": "user", "content": str(chunk)} | |
] | |
) | |
return response['choices'][0]['message']['content'] | |
def run_gpt_3(dict_in, function=call3): | |
"""Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish. | |
Args: | |
dict_in: A dictionary mapping keys to values. | |
function: A function that takes a value as input and returns a value as output. | |
Returns: | |
A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value. | |
""" | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
futures = [executor.submit(function, value) for value in dict_in.values()] | |
results = [future.result() for future in futures] | |
return dict(zip(dict_in.keys(), results)) | |
def call4(chunk): | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
temperature= .3, | |
messages=[ | |
{"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds. | |
Output in a python dictionary format whose structure is this: | |
{ | |
hook: "the hook" | |
summary: "summary" | |
chapters: { | |
timestamp : "chapter" | |
timestamp : "chapter" | |
} | |
}when | |
hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.) | |
Ex.One serendipitous relationship led him to start a company & change his life forever. | |
summary: Include main talking points and key phrases that will appeal to your | |
ideal listener. keep it concise. | |
chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible. | |
"""}, | |
{"role": "user", "content": str(chunk)} | |
] | |
) | |
return response['choices'][0]['message']['content'] | |
def clean_and_concatenate_dict_values(dict_in): | |
"""Cleans and concatenates the values of a dictionary. before sending to 4 | |
Args: | |
dict_in: A dictionary mapping keys to values. | |
Returns: | |
A long string containing the concatenated values of the dictionary, with each value preceded by its key. | |
""" | |
result = "" | |
for key, value in dict_in.items(): | |
# Clean the value | |
value = value.strip() | |
value = value.replace("- ", "") | |
# Concatenate the value to the result | |
result += f"{key}: {value}\n" | |
return result | |
# text = """ | |
# 6.08 seconds - Yeah, the Jack Carr one was pretty fun. | |
# 11.32 seconds - He's super nice. | |
# 16.56 seconds - I'm really enjoying this book. | |
# 21.80 seconds - I can't wait to see what happens next. | |
# 27.04 seconds - This is a great read. | |
# 32.28 seconds - I highly recommend it to anyone who enjoys thrillers. | |
# """ | |
# result = convert_to_dict(text) | |
# new_result = process_dict(result) | |
# # print(list(new_result.values())[7]) | |
# new_result |