AhmadMustafa commited on
Commit
40ddc76
·
1 Parent(s): 21cd05d
Files changed (3) hide show
  1. app.py +56 -0
  2. prompts.py +107 -22
  3. utils.py +18 -0
app.py CHANGED
@@ -21,6 +21,7 @@ from utils import openai_tools as tools
21
  client = OpenAI()
22
 
23
 
 
24
  def get_initial_analysis(
25
  transcript_processor: TranscriptProcessor, cid, rsid, origin, ct, uid
26
  ) -> Generator[str, None, None]:
@@ -414,6 +415,61 @@ def chat(
414
  messages.append(function_call_result_message)
415
  yield error_msg
416
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  break # Exit streaming loop if tool calls detected
418
 
419
  if not tool_calls_detected and chunk.choices[0].delta.content is not None:
 
21
  client = OpenAI()
22
 
23
 
24
+
25
  def get_initial_analysis(
26
  transcript_processor: TranscriptProcessor, cid, rsid, origin, ct, uid
27
  ) -> Generator[str, None, None]:
 
415
  messages.append(function_call_result_message)
416
  yield error_msg
417
  return
418
+
419
+ elif tool_call.function.name == "find_user_suggested_timestamps":
420
+ args = eval(tool_call.function.arguments)
421
+ user_suggestions = args.get("user_suggestions", "")
422
+
423
+ try:
424
+ # Get data needed for the functions
425
+ speakers = ", ".join(speaker_mapping.values())
426
+ transcript = transcript_processor.get_transcript()
427
+
428
+ # Import the functions
429
+ from prompts import find_user_suggested_timestamps, format_timestamps_to_rollai_links
430
+
431
+ # Use o4-mini to find accurate timestamps
432
+ timestamp_data = find_user_suggested_timestamps(
433
+ speakers=speakers,
434
+ transcript=transcript,
435
+ user_suggestions=user_suggestions,
436
+ client=client
437
+ )
438
+
439
+ # Use GPT-4o to format to roll.ai links
440
+ formatted_links = format_timestamps_to_rollai_links(
441
+ timestamp_data=timestamp_data,
442
+ cid=cid,
443
+ rsid=rsid,
444
+ origin=origin,
445
+ uid=uid,
446
+ link_start=link_start,
447
+ client=client
448
+ )
449
+
450
+ function_call_result_message = {
451
+ "role": "tool",
452
+ "content": formatted_links,
453
+ "name": tool_call.function.name,
454
+ "tool_call_id": tool_call.id,
455
+ }
456
+ messages.append(function_call_result_message)
457
+
458
+ yield formatted_links
459
+ return
460
+
461
+ except Exception as e:
462
+ error_msg = f"Error processing user suggestions: {str(e)}"
463
+ function_call_result_message = {
464
+ "role": "tool",
465
+ "content": error_msg,
466
+ "name": tool_call.function.name,
467
+ "tool_call_id": tool_call.id,
468
+ }
469
+ messages.append(function_call_result_message)
470
+ yield error_msg
471
+ return
472
+
473
  break # Exit streaming loop if tool calls detected
474
 
475
  if not tool_calls_detected and chunk.choices[0].delta.content is not None:
prompts.py CHANGED
@@ -1,5 +1,7 @@
1
  from typing import Dict
2
 
 
 
3
 
4
  def get_street_interview_prompt(
5
  transcript: str, uid: str, rsid: str, link_start: str
@@ -124,8 +126,6 @@ Call Type is {ct}.
124
  Speakers: {", ".join(speaker_mapping.values())}
125
  Transcript: {transcript}
126
 
127
- IMPORTANT: If the user suggests clips along with timestamps, the clips are correct but the timings can differ slightly from the actual transcript because the user was in the audience and does not have exact timestamps. You need to find the correct timestamp in the transcript and verify it instead of relying on the user's timestamps.
128
-
129
  If a user asks timestamps for a specific topic or things, find the start time and end time of that specific topic and return answer in the format:
130
  Answers and URLs should be formated as follows:
131
  [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
@@ -152,34 +152,119 @@ Provide the exact timestamp where the person begins their introduction, typicall
152
  If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
153
  If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
154
 
155
- IMPORTANT TIMESTAMP VERIFICATION INSTRUCTIONS:
156
- When a user provides clips with timestamps, the starting timestamp may be slightly inaccurate since they were noted by someone in the audience without access to exact timing data. You MUST verify and correct these timestamps by searching the actual transcript.
157
- For example, user says 15:25 "speaker 1 talks about mark zuckerberg" but the actual transcript shows that sentence starts at 15:30. You must return the corrected timestamp as 15:30.
158
- Required Process for Each User-Provided Timestamp:
159
- For every timestamp the user suggests, you must:
 
 
 
 
160
 
161
- Use <THINKING> tags to analyze each timestamp
162
- Inside the thinking tags:
 
 
 
163
 
164
- Quote the starting sentence the user claims begins at their suggested timestamp
165
- Search the transcript for where that sentence actually appears
166
- Compare the user's timestamp with the actual timestamp in the transcript
167
- Note any discrepancies and identify the correct start time
168
 
 
 
169
 
170
- Return your final answer with the corrected timestamps and properly formatted links
 
171
 
172
- Example format:
173
- <THINKING>
174
- User claims the clip starts at 15:25 with discussion on Mark Zuckerberg.
175
- Let me search the transcript for this sentence. The discussion on Mark Zuckerberg actually starts at 15:47 for 35 seconds.
176
- User claims the clip starts at 12:00 with discussion on Dinner.
177
- Let me search the transcript for this sentence. The discussion on Dinner actually starts at 11:30 for 20seconds.
178
- </THINKING>
179
 
180
- [Continue with your response using the verified timestamp in the correct URL format as shown above.]
 
 
 
 
 
 
181
  """
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def remove_unwanted_prompt(number_of_speakers: int):
185
  if number_of_speakers == 1:
 
1
  from typing import Dict
2
 
3
+ import openai
4
+
5
 
6
  def get_street_interview_prompt(
7
  transcript: str, uid: str, rsid: str, link_start: str
 
126
  Speakers: {", ".join(speaker_mapping.values())}
127
  Transcript: {transcript}
128
 
 
 
129
  If a user asks timestamps for a specific topic or things, find the start time and end time of that specific topic and return answer in the format:
130
  Answers and URLs should be formated as follows:
131
  [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
 
152
  If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
153
  If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
154
 
155
+ For user-suggested clips with timestamps, use the find_user_suggested_timestamps function to get accurate timestamps, then format them with format_timestamps_to_rollai_links.
156
+ """
157
+
158
+
159
+ def find_user_suggested_timestamps(
160
+ speakers: str, transcript: str, user_suggestions: str, client: openai.OpenAI
161
+ ) -> str:
162
+ """
163
+ Use o4-mini to find accurate timestamps for user-suggested clips.
164
 
165
+ Args:
166
+ speakers: Comma-separated list of speaker names
167
+ transcript: The full transcript with timestamps
168
+ user_suggestions: User's suggested clips with estimated timestamps
169
+ client: OpenAI client instance
170
 
171
+ Returns:
172
+ String containing found timestamps and clip descriptions
173
+ """
174
+ prompt = f"""You are analyzing a transcript to find accurate timestamps for user-suggested clips.
175
 
176
+ Speakers: {speakers}
177
+ Transcript: {transcript}
178
 
179
+ User Suggestions (with estimated timestamps that may be inaccurate):
180
+ {user_suggestions}
181
 
182
+ Your task:
183
+ 1. For each user-suggested clip, find the actual timestamp in the transcript where that content occurs
184
+ 2. The user's timestamps are estimates and may be off by several seconds or minutes
185
+ 3. Search for the actual content/topic they mentioned and find the precise start and end times
186
+ 4. Return the corrected timestamps with brief descriptions
 
 
187
 
188
+ Format your response as:
189
+ Clip 1: [Topic description] - Start: MM:SS, End: MM:SS, Duration: Xs
190
+ Clip 2: [Topic description] - Start: MM:SS, End: MM:SS, Duration: Xs
191
+ ...
192
+
193
+ Be precise with timestamps and ensure the clips make sense contextually.
194
+ The start and end should make sense with the Topic Description. Minimum Duration of the clip should be 20 seconds and maximum duration should be 120 seconds.
195
  """
196
 
197
+ try:
198
+ response = client.responses.create(
199
+ model="o4-mini",
200
+ reasoning={"effort": "medium"},
201
+ input=[{"role": "user", "content": prompt}],
202
+ )
203
+ return response.output_text
204
+ except Exception as e:
205
+ return f"Error finding timestamps: {str(e)}"
206
+
207
+
208
+ def format_timestamps_to_rollai_links(
209
+ timestamp_data: str,
210
+ cid: str,
211
+ rsid: str,
212
+ origin: str,
213
+ uid: str,
214
+ link_start: str,
215
+ client: openai.OpenAI,
216
+ ) -> str:
217
+ """
218
+ Use GPT-4o to convert timestamp data to roll.ai link format.
219
+
220
+ Args:
221
+ timestamp_data: Output from find_user_suggested_timestamps
222
+ cid: Call ID
223
+ rsid: Session ID
224
+ origin: Origin
225
+ uid: User ID
226
+ link_start: Link start URL
227
+ client: OpenAI client instance
228
+
229
+ Returns:
230
+ Formatted links in roll.ai style
231
+ """
232
+ prompt = f"""Convert the following timestamp data to roll.ai link format.
233
+
234
+ Timestamp Data:
235
+ {timestamp_data}
236
+
237
+ Call Details:
238
+ - Call ID: {cid}
239
+ - Session ID: {rsid}
240
+ - Origin: {origin}
241
+ - User ID: {uid}
242
+ - Link Start: {link_start}
243
+
244
+ Convert each clip to this exact format:
245
+ [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_seconds}}&et={{end_seconds}}&uid={{uid}})
246
+
247
+ Notes:
248
+ - Convert MM:SS timestamps to total seconds for st= and et= parameters
249
+ - Use the topic description as the link text
250
+ - Include the duration and start time in the div element
251
+ - Replace {{origin}}, {{cid}}, {{rsid}}, {{uid}} with actual values: {origin}, {cid}, {rsid}, {uid}
252
+
253
+ Example conversion:
254
+ "Clip 1: Introduction - Start: 12:30, End: 12:52, Duration: 22s"
255
+ becomes:
256
+ [Introduction <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{origin}/collab/{cid}/{rsid}?st=750&et=772&uid={uid})
257
+
258
+ Return only the formatted links, one per line."""
259
+
260
+ try:
261
+ response = client.chat.completions.create(
262
+ model="gpt-4o", messages=[{"role": "user", "content": prompt}]
263
+ )
264
+ return response.choices[0].message.content
265
+ except Exception as e:
266
+ return f"Error formatting links: {str(e)}"
267
+
268
 
269
  def remove_unwanted_prompt(number_of_speakers: int):
270
  if number_of_speakers == 1:
utils.py CHANGED
@@ -110,6 +110,24 @@ openai_tools = [
110
  },
111
  },
112
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  ]
114
 
115
  css = """
 
110
  },
111
  },
112
  },
113
+ {
114
+ "type": "function",
115
+ "function": {
116
+ "name": "find_user_suggested_timestamps",
117
+ "description": "When user provides suggested clips with estimated timestamps, use this function to find accurate timestamps in the transcript. The user's timestamps may be inaccurate since they were noted by someone in the audience.",
118
+ "parameters": {
119
+ "type": "object",
120
+ "properties": {
121
+ "user_suggestions": {
122
+ "type": "string",
123
+ "description": "The user's message containing suggested clips with estimated timestamps",
124
+ }
125
+ },
126
+ "required": ["user_suggestions"],
127
+ "additionalProperties": False,
128
+ },
129
+ },
130
+ },
131
  ]
132
 
133
  css = """