transcript2notes / meeting_notes.py
fracapuano's picture
add files via upload
15253f2 verified
from openai import OpenAI
from dotenv import load_dotenv
from typing import List, Optional
from tqdm import tqdm
load_dotenv()
client = OpenAI()
def extract_topics(meeting_transcript):
prompt_text = f"""
## Transcript
<transcript>
{meeting_transcript}
</transcript>
You are a topic extractor whose main task is to identify and list the top 5 most important topics discussed
in a meeting, to whom you have access thanks to the provided meeting transcript.
Provided the transcript available under the <transcript> tags, analyze it and extract the main topics discussed.
Format your output as a list of skills I could iterate on.
An ideal output for a meeting covering budgeting budget concers, project handling and staffing looks like
[Budgeting, Project Deadlines, Staffing, New Policies, Client Feedback]
"""
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt_text}],
max_tokens=100, # short list of topics discussed
temperature=0.0,
stop=["\n", "]"] # Stops to help ensure the list format is respected
)
topics = response.choices[0].message.content.strip('][').split(', ') # Processing the string output into a Python list
return topics
def generate_bullet_point_summary(topic, meeting_transcript):
prompt_text = f"""
<topic>
{topic}
</topic>
<transcript>
{meeting_transcript}
</transcript>
You are an AI assistant tasked with assisting in summarizing meeting discussions.
Below is the transcript of a meeting, and a specific topic to focus on.
Please provide a summary of all the discussions related to this topic in bullet points. Be very
concise and to the point. Each bullet point must contain one concept only.
"""
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt_text}],
max_tokens=400, # Increase if more detailed summaries are needed
stop=["\n\n"] # A double newline to signify the end of the summary list
)
summary = response.choices[0].message.content.strip()
return summary
def summarize_topics(topics, meeting_transcript):
# Generate summaries for each topic
summaries = {}
for topic in topics:
summaries[topic] = generate_bullet_point_summary(topic, meeting_transcript)
return summaries
def extract_actionable_items(meeting_transcript):
prompt_text = f"""
Please carefully analyze the following meeting transcript, which will be provided between XML tags:
<meeting_transcript>
{meeting_transcript}
</meeting_transcript>
First, identify each unique speaker who participated in the meeting.
Then, for each speaker you identified, carefully extract any concrete action items, tasks, or next
steps that were assigned to them during the meeting. Use the full context of the meeting to
determine what the key next steps are for each person.
Format your response as a bulleted list, with each speaker's full name followed by a sublist of the
specific action items you identified for them. Here is an example of the desired format:
## John Smith:
- Follow up with the client by next Wednesday.
- Prepare a detailed budget proposal for the next meeting.
- Jane Doe:
- Coordinate with the marketing team to draft the new campaign outline.
- Send updated staffing requirements to HR by Friday.
Omit any speakers for whom no clear action items or next steps were specified in the meeting. Focus
on extracting the most concrete and actionable items for each speaker.
Write your full list of speakers and action items inside <result> tags.
If you are unable to identify the speakers' names, please write "Speaker 1", "Speaker 2", etc.
"""
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt_text}],
stop=["\n\n"] # A double newline to signify the end of the list
)
action_items = response.choices[0].message.content.strip()
return action_items
def cleanup_meeting_notes(meeting_notes, speakers_list=None):
prompt_text = f"""
<meeting_notes_draft>
{meeting_notes}
</meeting_notes_draft>
<speakers_list>
{speakers_list if speakers_list else "No speakers list provided"}
</speakers_list>
You are a meeting notes editor who has been tasked with cleaning up the draft of a meeting notes document.
You must not modify the content you receive in any way or form, your task is simply to reformat the text to make it adhere to
the following guidelines:
- Production-ready meeting notes are always formatted in markdown. Ensure that the text is properly formatted in markdown.
- Production-ready meeting notes always have 3 sections: "Speakers", "Meeting Summary", "Action Items". These sections are always H1 in markdown (#Speakers, #Meeting Summary, #Action Items).
- Production-ready meeting notes always have a horizontal rule (---) between each section.
- Production-ready meeting notes always present the topics discussed in the #Meeting Summary section, with each topic being a toggle subheading (> ##Topic).
- Production-ready meeting notes always present the bullet points under each topic as markdown bullet points points.
- Production-ready meeting notes always have each speaker's name in bold.
- Production-ready meeting notes always have the action items in a bulleted list.
- Production-ready meeting notes always have the action items grouped by the speaker who is responsible for them.
- Production-ready meetings always presents speakers mapped to the name in the <speakers_list> tag, if available, in the same order. This means that for ["Francesco", "Carlo", "Antonio"]
you would have that "Francesco" is the "Speaker 0", "Carlo" is the "Speaker 1", and "Antonio" is the "Speaker 2".
Your output must exactly match the format described above. You must not modify the content of the meeting notes in any way, only the formatting. You will be
penalized if you change the content of the meeting notes.
An example template for the meeting notes is as follows:
# Speakers
- **Speaker 0**
- **Speaker 1**
...
---
# Meeting Summary
> ## Topic 1
- Bullet point 1
- Bullet point 2
...
> ## Topic 2
- Bullet point 1
- Bullet point 2
...
---
# Action Items
## <Speaker 0's name> to own
- Action item 1
- Action item 2
## <Speaker 1's name> to own
- Action item 1
- Action item 2
"""
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": prompt_text}]
)
return response.choices[0].message.content
def transcript_to_notes(meeting_transcript: str, speakers_list:Optional[List[str]]=None) -> str:
"""Converts a meeting transcript into formatted meeting notes.
Args:
meeting_transcript (str): The text of the meeting transcript
speakers_list (Optional[List[str]]): A list of speakers in the meeting
Returns:
str: The formatted meeting notes
"""
pbar = tqdm(total=3)
topics = extract_topics(meeting_transcript)
pbar.update(1)
by_topic_summaries = summarize_topics(topics, meeting_transcript)
pbar.update(1)
actions_by_speaker = extract_actionable_items(meeting_transcript)
pbar.update(1)
draft_notes = f"""
topics: {topics}
summaries: {by_topic_summaries}
actions: {actions_by_speaker}
"""
meeting_notes = cleanup_meeting_notes(draft_notes, speakers_list)
return meeting_notes
# Example usage
if __name__ == "__main__":
with open("tanguy-off-boarding-meeting.txt", "r") as file:
meeting_transcript = file.read()
speakers_list = ["Tanguy", "Francesco"]
notes = transcript_to_notes(meeting_transcript, speakers_list)
with open("meeting_notes.md", "w") as file:
file.write(notes)
print("Meeting notes generated successfully!")