Spaces:

RollAI
/

ChatWithTranscript

Running

App Files Files Community

AhmadMustafa commited on Jun 16

Commit

40ddc76

1 Parent(s): 21cd05d

add: o3

Browse files

Files changed (3) hide show

app.py +56 -0
prompts.py +107 -22
utils.py +18 -0

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ from utils import openai_tools as tools
 client = OpenAI()
 def get_initial_analysis(
     transcript_processor: TranscriptProcessor, cid, rsid, origin, ct, uid
 ) -> Generator[str, None, None]:
@@ -414,6 +415,61 @@ def chat(
                             messages.append(function_call_result_message)
                             yield error_msg
                             return
                 break  # Exit streaming loop if tool calls detected
             if not tool_calls_detected and chunk.choices[0].delta.content is not None:

 client = OpenAI()
 def get_initial_analysis(
     transcript_processor: TranscriptProcessor, cid, rsid, origin, ct, uid
 ) -> Generator[str, None, None]:
                             messages.append(function_call_result_message)
                             yield error_msg
                             return
+                    elif tool_call.function.name == "find_user_suggested_timestamps":
+                        args = eval(tool_call.function.arguments)
+                        user_suggestions = args.get("user_suggestions", "")
+                        try:
+                            # Get data needed for the functions
+                            speakers = ", ".join(speaker_mapping.values())
+                            transcript = transcript_processor.get_transcript()
+                            # Import the functions
+                            from prompts import find_user_suggested_timestamps, format_timestamps_to_rollai_links
+                            # Use o4-mini to find accurate timestamps
+                            timestamp_data = find_user_suggested_timestamps(
+                                speakers=speakers,
+                                transcript=transcript,
+                                user_suggestions=user_suggestions,
+                                client=client
+                            )
+                            # Use GPT-4o to format to roll.ai links
+                            formatted_links = format_timestamps_to_rollai_links(
+                                timestamp_data=timestamp_data,
+                                cid=cid,
+                                rsid=rsid,
+                                origin=origin,
+                                uid=uid,
+                                link_start=link_start,
+                                client=client
+                            )
+                            function_call_result_message = {
+                                "role": "tool",
+                                "content": formatted_links,
+                                "name": tool_call.function.name,
+                                "tool_call_id": tool_call.id,
+                            }
+                            messages.append(function_call_result_message)
+                            yield formatted_links
+                            return
+                        except Exception as e:
+                            error_msg = f"Error processing user suggestions: {str(e)}"
+                            function_call_result_message = {
+                                "role": "tool",
+                                "content": error_msg,
+                                "name": tool_call.function.name,
+                                "tool_call_id": tool_call.id,
+                            }
+                            messages.append(function_call_result_message)
+                            yield error_msg
+                            return
                 break  # Exit streaming loop if tool calls detected
             if not tool_calls_detected and chunk.choices[0].delta.content is not None:

prompts.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from typing import Dict
 def get_street_interview_prompt(
     transcript: str, uid: str, rsid: str, link_start: str
@@ -124,8 +126,6 @@ Call Type is {ct}.
 Speakers: {", ".join(speaker_mapping.values())}
 Transcript: {transcript}
-IMPORTANT: If the user suggests clips along with timestamps, the clips are correct but the timings can differ slightly from the actual transcript because the user was in the audience and does not have exact timestamps. You need to find the correct timestamp in the transcript and verify it instead of relying on the user's timestamps.
 If a user asks timestamps for a specific topic or things, find the start time and end time of that specific topic and return answer in the format:
 Answers and URLs should be formated as follows:
 [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
@@ -152,34 +152,119 @@ Provide the exact timestamp where the person begins their introduction, typicall
 If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
 If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
-IMPORTANT TIMESTAMP VERIFICATION INSTRUCTIONS:
-When a user provides clips with timestamps, the starting timestamp may be slightly inaccurate since they were noted by someone in the audience without access to exact timing data. You MUST verify and correct these timestamps by searching the actual transcript.
-For example, user says 15:25 "speaker 1 talks about mark zuckerberg" but the actual transcript shows that sentence starts at 15:30. You must return the corrected timestamp as 15:30.
-Required Process for Each User-Provided Timestamp:
-For every timestamp the user suggests, you must:
-Use <THINKING> tags to analyze each timestamp
-Inside the thinking tags:
-Quote the starting sentence the user claims begins at their suggested timestamp
-Search the transcript for where that sentence actually appears
-Compare the user's timestamp with the actual timestamp in the transcript
-Note any discrepancies and identify the correct start time
-Return your final answer with the corrected timestamps and properly formatted links
-Example format:
-<THINKING>
-User claims the clip starts at 15:25 with discussion on Mark Zuckerberg.
-Let me search the transcript for this sentence. The discussion on Mark Zuckerberg actually starts at 15:47 for 35 seconds.
-User claims the clip starts at 12:00 with discussion on Dinner.
-Let me search the transcript for this sentence. The discussion on Dinner actually starts at 11:30 for 20seconds.
-</THINKING>
-[Continue with your response using the verified timestamp in the correct URL format as shown above.]
 """
 def remove_unwanted_prompt(number_of_speakers: int):
     if number_of_speakers == 1:

 from typing import Dict
+import openai
 def get_street_interview_prompt(
     transcript: str, uid: str, rsid: str, link_start: str
 Speakers: {", ".join(speaker_mapping.values())}
 Transcript: {transcript}
 If a user asks timestamps for a specific topic or things, find the start time and end time of that specific topic and return answer in the format:
 Answers and URLs should be formated as follows:
 [Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{750}}&et={{772}}&uid={{uid}})
 If the user provides a link to the agenda, use the correct_speaker_name_with_url function to correct the speaker names based on the agenda.
 If the user provides the correct call type, use the correct_call_type function to correct the call type. Call Type for street interviews is 'si'.
+For user-suggested clips with timestamps, use the find_user_suggested_timestamps function to get accurate timestamps, then format them with format_timestamps_to_rollai_links.
+"""
+def find_user_suggested_timestamps(
+    speakers: str, transcript: str, user_suggestions: str, client: openai.OpenAI
+) -> str:
+    """
+    Use o4-mini to find accurate timestamps for user-suggested clips.
+    Args:
+        speakers: Comma-separated list of speaker names
+        transcript: The full transcript with timestamps
+        user_suggestions: User's suggested clips with estimated timestamps
+        client: OpenAI client instance
+    Returns:
+        String containing found timestamps and clip descriptions
+    """
+    prompt = f"""You are analyzing a transcript to find accurate timestamps for user-suggested clips.
+Speakers: {speakers}
+Transcript: {transcript}
+User Suggestions (with estimated timestamps that may be inaccurate):
+{user_suggestions}
+Your task:
+1. For each user-suggested clip, find the actual timestamp in the transcript where that content occurs
+2. The user's timestamps are estimates and may be off by several seconds or minutes
+3. Search for the actual content/topic they mentioned and find the precise start and end times
+4. Return the corrected timestamps with brief descriptions
+Format your response as:
+Clip 1: [Topic description] - Start: MM:SS, End: MM:SS, Duration: Xs
+Clip 2: [Topic description] - Start: MM:SS, End: MM:SS, Duration: Xs
+...
+Be precise with timestamps and ensure the clips make sense contextually.
+The start and end should make sense with the Topic Description. Minimum Duration of the clip should be 20 seconds and maximum duration should be 120 seconds.
 """
+    try:
+        response = client.responses.create(
+            model="o4-mini",
+            reasoning={"effort": "medium"},
+            input=[{"role": "user", "content": prompt}],
+        )
+        return response.output_text
+    except Exception as e:
+        return f"Error finding timestamps: {str(e)}"
+def format_timestamps_to_rollai_links(
+    timestamp_data: str,
+    cid: str,
+    rsid: str,
+    origin: str,
+    uid: str,
+    link_start: str,
+    client: openai.OpenAI,
+) -> str:
+    """
+    Use GPT-4o to convert timestamp data to roll.ai link format.
+    Args:
+        timestamp_data: Output from find_user_suggested_timestamps
+        cid: Call ID
+        rsid: Session ID
+        origin: Origin
+        uid: User ID
+        link_start: Link start URL
+        client: OpenAI client instance
+    Returns:
+        Formatted links in roll.ai style
+    """
+    prompt = f"""Convert the following timestamp data to roll.ai link format.
+Timestamp Data:
+{timestamp_data}
+Call Details:
+- Call ID: {cid}
+- Session ID: {rsid}
+- Origin: {origin}
+- User ID: {uid}
+- Link Start: {link_start}
+Convert each clip to this exact format:
+[Topic title <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{{origin}}/collab/{{cid}}/{{rsid}}?st={{start_seconds}}&et={{end_seconds}}&uid={{uid}})
+Notes:
+- Convert MM:SS timestamps to total seconds for st= and et= parameters
+- Use the topic description as the link text
+- Include the duration and start time in the div element
+- Replace {{origin}}, {{cid}}, {{rsid}}, {{uid}} with actual values: {origin}, {cid}, {rsid}, {uid}
+Example conversion:
+"Clip 1: Introduction - Start: 12:30, End: 12:52, Duration: 22s"
+becomes:
+[Introduction <div id='topic' style="display: inline"> 22s at 12:30 </div>]({link_start}://{origin}/collab/{cid}/{rsid}?st=750&et=772&uid={uid})
+Return only the formatted links, one per line."""
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4o", messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error formatting links: {str(e)}"
 def remove_unwanted_prompt(number_of_speakers: int):
     if number_of_speakers == 1:

utils.py CHANGED Viewed

@@ -110,6 +110,24 @@ openai_tools = [
             },
         },
     },
 ]
 css = """

             },
         },
     },
+    {
+        "type": "function",
+        "function": {
+            "name": "find_user_suggested_timestamps",
+            "description": "When user provides suggested clips with estimated timestamps, use this function to find accurate timestamps in the transcript. The user's timestamps may be inaccurate since they were noted by someone in the audience.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "user_suggestions": {
+                        "type": "string",
+                        "description": "The user's message containing suggested clips with estimated timestamps",
+                    }
+                },
+                "required": ["user_suggestions"],
+                "additionalProperties": False,
+            },
+        },
+    },
 ]
 css = """