import re import openai import concurrent.futures import os import sys def convert_to_dict(text): """Converts text in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun." to a dictionary. Args: text: A string in the format "6.08 seconds - Yeah, the Jack Carr one was pretty fun.". Returns: A dictionary mapping the seconds to the text. """ result = {} for line in text.splitlines(): match = re.match(r"(\d+\.\d+) seconds - (.*)", line) if match: seconds = float(match.group(1)) text = match.group(2) result[seconds] = text return result def process_dict(text, batch_size=20): """Processes a dictionary by combining the values of 20 elements into a single string. Args: dict_in: A dictionary mapping seconds to text. batch_size: The number of elements to combine into a single string. Returns: A new dictionary mapping the seconds of the first element in each batch to the combined values of the 20 elements in the batch. """ dict_in = convert_to_dict(text) result = {} current_batch = [] current_key = None for seconds, text in dict_in.items(): if current_key is None: current_key = seconds current_batch.append(text) if len(current_batch) == batch_size: combined_value = " ".join(current_batch) result[current_key] = combined_value current_batch = [] current_key = None if current_batch: combined_value = " ".join(current_batch) result[current_key] = combined_value return result def call3(chunk): response = openai.ChatCompletion.create( model="gpt-3.5-turbo", temperature= 0, messages=[ {"role": "system", "content": "You are a podcast chunk summarizer. You will be given a random chunk from a podcast transcript. you will return 3 most important topics (or less if necessary) from that chunk as bulleted point as output. Make the bullet points as concise and informative as possible."}, {"role": "user", "content": str(chunk)} ] ) return response['choices'][0]['message']['content'] def run_gpt_3(dict_in, function=call3): """Processes a dictionary by sending all the elements individually to a function and waiting for all of them to finish. Args: dict_in: A dictionary mapping keys to values. function: A function that takes a value as input and returns a value as output. Returns: A dictionary mapping the same keys as the input dictionary to the results of calling the function on each value. """ with concurrent.futures.ThreadPoolExecutor() as executor: futures = [executor.submit(function, value) for value in dict_in.values()] results = [future.result() for future in futures] return dict(zip(dict_in.keys(), results)) def call4(chunk): response = openai.ChatCompletion.create( model="gpt-3.5-turbo", temperature= .3, messages=[ {"role": "system", "content": """You are a podcast summarizer. You will be given the chunked gist of a long podcast, each chunk will have it's timestamp in seconds. Output in a python dictionary format whose structure is this: { hook: "the hook" summary: "summary" chapters: { timestamp : "chapter" timestamp : "chapter" } }when hook: (Begin your podcast show notes with a gripping quote, anecdote, or question.) Ex.One serendipitous relationship led him to start a company & change his life forever. summary: Include main talking points and key phrases that will appeal to your ideal listener. keep it concise. chapters: analyze the whole input, and extract only the most important topics. Remove as much filler and unnecessary info and details as possible. """}, {"role": "user", "content": str(chunk)} ] ) return response['choices'][0]['message']['content'] def clean_and_concatenate_dict_values(dict_in): """Cleans and concatenates the values of a dictionary. before sending to 4 Args: dict_in: A dictionary mapping keys to values. Returns: A long string containing the concatenated values of the dictionary, with each value preceded by its key. """ result = "" for key, value in dict_in.items(): # Clean the value value = value.strip() value = value.replace("- ", "") # Concatenate the value to the result result += f"{key}: {value}\n" return result # text = """ # 6.08 seconds - Yeah, the Jack Carr one was pretty fun. # 11.32 seconds - He's super nice. # 16.56 seconds - I'm really enjoying this book. # 21.80 seconds - I can't wait to see what happens next. # 27.04 seconds - This is a great read. # 32.28 seconds - I highly recommend it to anyone who enjoys thrillers. # """ # result = convert_to_dict(text) # new_result = process_dict(result) # # print(list(new_result.values())[7]) # new_result