Spaces:
Sleeping
Sleeping
from openai import OpenAI | |
from dotenv import load_dotenv | |
from typing import List, Optional | |
from tqdm import tqdm | |
load_dotenv() | |
client = OpenAI() | |
def extract_topics(meeting_transcript): | |
prompt_text = f""" | |
## Transcript | |
<transcript> | |
{meeting_transcript} | |
</transcript> | |
You are a topic extractor whose main task is to identify and list the top 5 most important topics discussed | |
in a meeting, to whom you have access thanks to the provided meeting transcript. | |
Provided the transcript available under the <transcript> tags, analyze it and extract the main topics discussed. | |
Format your output as a list of skills I could iterate on. | |
An ideal output for a meeting covering budgeting budget concers, project handling and staffing looks like | |
[Budgeting, Project Deadlines, Staffing, New Policies, Client Feedback] | |
""" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[{"role": "user", "content": prompt_text}], | |
max_tokens=100, # short list of topics discussed | |
temperature=0.0, | |
stop=["\n", "]"] # Stops to help ensure the list format is respected | |
) | |
topics = response.choices[0].message.content.strip('][').split(', ') # Processing the string output into a Python list | |
return topics | |
def generate_bullet_point_summary(topic, meeting_transcript): | |
prompt_text = f""" | |
<topic> | |
{topic} | |
</topic> | |
<transcript> | |
{meeting_transcript} | |
</transcript> | |
You are an AI assistant tasked with assisting in summarizing meeting discussions. | |
Below is the transcript of a meeting, and a specific topic to focus on. | |
Please provide a summary of all the discussions related to this topic in bullet points. Be very | |
concise and to the point. Each bullet point must contain one concept only. | |
""" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[{"role": "user", "content": prompt_text}], | |
max_tokens=400, # Increase if more detailed summaries are needed | |
stop=["\n\n"] # A double newline to signify the end of the summary list | |
) | |
summary = response.choices[0].message.content.strip() | |
return summary | |
def summarize_topics(topics, meeting_transcript): | |
# Generate summaries for each topic | |
summaries = {} | |
for topic in topics: | |
summaries[topic] = generate_bullet_point_summary(topic, meeting_transcript) | |
return summaries | |
def extract_actionable_items(meeting_transcript): | |
prompt_text = f""" | |
Please carefully analyze the following meeting transcript, which will be provided between XML tags: | |
<meeting_transcript> | |
{meeting_transcript} | |
</meeting_transcript> | |
First, identify each unique speaker who participated in the meeting. | |
Then, for each speaker you identified, carefully extract any concrete action items, tasks, or next | |
steps that were assigned to them during the meeting. Use the full context of the meeting to | |
determine what the key next steps are for each person. | |
Format your response as a bulleted list, with each speaker's full name followed by a sublist of the | |
specific action items you identified for them. Here is an example of the desired format: | |
## John Smith: | |
- Follow up with the client by next Wednesday. | |
- Prepare a detailed budget proposal for the next meeting. | |
- Jane Doe: | |
- Coordinate with the marketing team to draft the new campaign outline. | |
- Send updated staffing requirements to HR by Friday. | |
Omit any speakers for whom no clear action items or next steps were specified in the meeting. Focus | |
on extracting the most concrete and actionable items for each speaker. | |
Write your full list of speakers and action items inside <result> tags. | |
If you are unable to identify the speakers' names, please write "Speaker 1", "Speaker 2", etc. | |
""" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[{"role": "user", "content": prompt_text}], | |
stop=["\n\n"] # A double newline to signify the end of the list | |
) | |
action_items = response.choices[0].message.content.strip() | |
return action_items | |
def cleanup_meeting_notes(meeting_notes, speakers_list=None): | |
prompt_text = f""" | |
<meeting_notes_draft> | |
{meeting_notes} | |
</meeting_notes_draft> | |
<speakers_list> | |
{speakers_list if speakers_list else "No speakers list provided"} | |
</speakers_list> | |
You are a meeting notes editor who has been tasked with cleaning up the draft of a meeting notes document. | |
You must not modify the content you receive in any way or form, your task is simply to reformat the text to make it adhere to | |
the following guidelines: | |
- Production-ready meeting notes are always formatted in markdown. Ensure that the text is properly formatted in markdown. | |
- Production-ready meeting notes always have 3 sections: "Speakers", "Meeting Summary", "Action Items". These sections are always H1 in markdown (#Speakers, #Meeting Summary, #Action Items). | |
- Production-ready meeting notes always have a horizontal rule (---) between each section. | |
- Production-ready meeting notes always present the topics discussed in the #Meeting Summary section, with each topic being a toggle subheading (> ##Topic). | |
- Production-ready meeting notes always present the bullet points under each topic as markdown bullet points points. | |
- Production-ready meeting notes always have each speaker's name in bold. | |
- Production-ready meeting notes always have the action items in a bulleted list. | |
- Production-ready meeting notes always have the action items grouped by the speaker who is responsible for them. | |
- Production-ready meetings always presents speakers mapped to the name in the <speakers_list> tag, if available, in the same order. This means that for ["Francesco", "Carlo", "Antonio"] | |
you would have that "Francesco" is the "Speaker 0", "Carlo" is the "Speaker 1", and "Antonio" is the "Speaker 2". | |
Your output must exactly match the format described above. You must not modify the content of the meeting notes in any way, only the formatting. You will be | |
penalized if you change the content of the meeting notes. | |
An example template for the meeting notes is as follows: | |
# Speakers | |
- **Speaker 0** | |
- **Speaker 1** | |
... | |
--- | |
# Meeting Summary | |
> ## Topic 1 | |
- Bullet point 1 | |
- Bullet point 2 | |
... | |
> ## Topic 2 | |
- Bullet point 1 | |
- Bullet point 2 | |
... | |
--- | |
# Action Items | |
## <Speaker 0's name> to own | |
- Action item 1 | |
- Action item 2 | |
## <Speaker 1's name> to own | |
- Action item 1 | |
- Action item 2 | |
""" | |
response = client.chat.completions.create( | |
model="gpt-4-turbo", | |
messages=[{"role": "user", "content": prompt_text}] | |
) | |
return response.choices[0].message.content | |
def transcript_to_notes(meeting_transcript: str, speakers_list:Optional[List[str]]=None) -> str: | |
"""Converts a meeting transcript into formatted meeting notes. | |
Args: | |
meeting_transcript (str): The text of the meeting transcript | |
speakers_list (Optional[List[str]]): A list of speakers in the meeting | |
Returns: | |
str: The formatted meeting notes | |
""" | |
pbar = tqdm(total=3) | |
topics = extract_topics(meeting_transcript) | |
pbar.update(1) | |
by_topic_summaries = summarize_topics(topics, meeting_transcript) | |
pbar.update(1) | |
actions_by_speaker = extract_actionable_items(meeting_transcript) | |
pbar.update(1) | |
draft_notes = f""" | |
topics: {topics} | |
summaries: {by_topic_summaries} | |
actions: {actions_by_speaker} | |
""" | |
meeting_notes = cleanup_meeting_notes(draft_notes, speakers_list) | |
return meeting_notes | |
# Example usage | |
if __name__ == "__main__": | |
with open("tanguy-off-boarding-meeting.txt", "r") as file: | |
meeting_transcript = file.read() | |
speakers_list = ["Tanguy", "Francesco"] | |
notes = transcript_to_notes(meeting_transcript, speakers_list) | |
with open("meeting_notes.md", "w") as file: | |
file.write(notes) | |
print("Meeting notes generated successfully!") |