Spaces:
Running
Running
Commit
·
c7b90e4
1
Parent(s):
deeeb78
rm unessary comment
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Dict, List, Optional, Tuple
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import requests
|
|
|
8 |
from openai import OpenAI
|
9 |
|
10 |
street_interview = False
|
@@ -30,7 +31,7 @@ class TranscriptProcessor:
|
|
30 |
self.speaker_mapping = {}
|
31 |
self._load_transcript()
|
32 |
self._process_transcript()
|
33 |
-
self.map_speaker_ids_to_names()
|
34 |
|
35 |
def _load_transcript(self) -> None:
|
36 |
"""Load the transcript JSON file."""
|
@@ -140,7 +141,7 @@ class TranscriptProcessor:
|
|
140 |
for segment in self.segments:
|
141 |
spk_id = f"spk_{segment.speaker_id}"
|
142 |
speaker_name = self.speaker_mapping.get(spk_id, spk_id)
|
143 |
-
segment.speaker_name = speaker_name
|
144 |
|
145 |
# Recreate the formatted transcript with speaker names
|
146 |
formatted_segments = []
|
@@ -160,7 +161,7 @@ class TranscriptProcessor:
|
|
160 |
def correct_speaker_mapping_with_agenda(self, url: str) -> None:
|
161 |
"""Fetch agenda from a URL and correct the speaker mapping using OpenAI."""
|
162 |
try:
|
163 |
-
|
164 |
response = requests.get(url)
|
165 |
response.raise_for_status()
|
166 |
html_content = response.text
|
@@ -201,7 +202,6 @@ class TranscriptProcessor:
|
|
201 |
"You should only update the name if the name sounds very similar, or there is a good spelling overlap/ The Speaker Introduction matches the description of the Talk from Agends. If the name is totally unrelated, keep the original name."
|
202 |
)
|
203 |
|
204 |
-
# Use OpenAI API to get corrected mapping
|
205 |
client = OpenAI()
|
206 |
|
207 |
completion = client.chat.completions.create(
|
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import requests
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
from openai import OpenAI
|
10 |
|
11 |
street_interview = False
|
|
|
31 |
self.speaker_mapping = {}
|
32 |
self._load_transcript()
|
33 |
self._process_transcript()
|
34 |
+
self.map_speaker_ids_to_names()
|
35 |
|
36 |
def _load_transcript(self) -> None:
|
37 |
"""Load the transcript JSON file."""
|
|
|
141 |
for segment in self.segments:
|
142 |
spk_id = f"spk_{segment.speaker_id}"
|
143 |
speaker_name = self.speaker_mapping.get(spk_id, spk_id)
|
144 |
+
segment.speaker_name = speaker_name
|
145 |
|
146 |
# Recreate the formatted transcript with speaker names
|
147 |
formatted_segments = []
|
|
|
161 |
def correct_speaker_mapping_with_agenda(self, url: str) -> None:
|
162 |
"""Fetch agenda from a URL and correct the speaker mapping using OpenAI."""
|
163 |
try:
|
164 |
+
|
165 |
response = requests.get(url)
|
166 |
response.raise_for_status()
|
167 |
html_content = response.text
|
|
|
202 |
"You should only update the name if the name sounds very similar, or there is a good spelling overlap/ The Speaker Introduction matches the description of the Talk from Agends. If the name is totally unrelated, keep the original name."
|
203 |
)
|
204 |
|
|
|
205 |
client = OpenAI()
|
206 |
|
207 |
completion = client.chat.completions.create(
|