Spaces:
Running
Running
Commit
·
c7b90e4
1
Parent(s):
deeeb78
rm unessary comment
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Dict, List, Optional, Tuple
|
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import requests
|
|
|
|
| 8 |
from openai import OpenAI
|
| 9 |
|
| 10 |
street_interview = False
|
|
@@ -30,7 +31,7 @@ class TranscriptProcessor:
|
|
| 30 |
self.speaker_mapping = {}
|
| 31 |
self._load_transcript()
|
| 32 |
self._process_transcript()
|
| 33 |
-
self.map_speaker_ids_to_names()
|
| 34 |
|
| 35 |
def _load_transcript(self) -> None:
|
| 36 |
"""Load the transcript JSON file."""
|
|
@@ -140,7 +141,7 @@ class TranscriptProcessor:
|
|
| 140 |
for segment in self.segments:
|
| 141 |
spk_id = f"spk_{segment.speaker_id}"
|
| 142 |
speaker_name = self.speaker_mapping.get(spk_id, spk_id)
|
| 143 |
-
segment.speaker_name = speaker_name
|
| 144 |
|
| 145 |
# Recreate the formatted transcript with speaker names
|
| 146 |
formatted_segments = []
|
|
@@ -160,7 +161,7 @@ class TranscriptProcessor:
|
|
| 160 |
def correct_speaker_mapping_with_agenda(self, url: str) -> None:
|
| 161 |
"""Fetch agenda from a URL and correct the speaker mapping using OpenAI."""
|
| 162 |
try:
|
| 163 |
-
|
| 164 |
response = requests.get(url)
|
| 165 |
response.raise_for_status()
|
| 166 |
html_content = response.text
|
|
@@ -201,7 +202,6 @@ class TranscriptProcessor:
|
|
| 201 |
"You should only update the name if the name sounds very similar, or there is a good spelling overlap/ The Speaker Introduction matches the description of the Talk from Agends. If the name is totally unrelated, keep the original name."
|
| 202 |
)
|
| 203 |
|
| 204 |
-
# Use OpenAI API to get corrected mapping
|
| 205 |
client = OpenAI()
|
| 206 |
|
| 207 |
completion = client.chat.completions.create(
|
|
|
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import requests
|
| 8 |
+
from bs4 import BeautifulSoup
|
| 9 |
from openai import OpenAI
|
| 10 |
|
| 11 |
street_interview = False
|
|
|
|
| 31 |
self.speaker_mapping = {}
|
| 32 |
self._load_transcript()
|
| 33 |
self._process_transcript()
|
| 34 |
+
self.map_speaker_ids_to_names()
|
| 35 |
|
| 36 |
def _load_transcript(self) -> None:
|
| 37 |
"""Load the transcript JSON file."""
|
|
|
|
| 141 |
for segment in self.segments:
|
| 142 |
spk_id = f"spk_{segment.speaker_id}"
|
| 143 |
speaker_name = self.speaker_mapping.get(spk_id, spk_id)
|
| 144 |
+
segment.speaker_name = speaker_name
|
| 145 |
|
| 146 |
# Recreate the formatted transcript with speaker names
|
| 147 |
formatted_segments = []
|
|
|
|
| 161 |
def correct_speaker_mapping_with_agenda(self, url: str) -> None:
|
| 162 |
"""Fetch agenda from a URL and correct the speaker mapping using OpenAI."""
|
| 163 |
try:
|
| 164 |
+
|
| 165 |
response = requests.get(url)
|
| 166 |
response.raise_for_status()
|
| 167 |
html_content = response.text
|
|
|
|
| 202 |
"You should only update the name if the name sounds very similar, or there is a good spelling overlap/ The Speaker Introduction matches the description of the Talk from Agends. If the name is totally unrelated, keep the original name."
|
| 203 |
)
|
| 204 |
|
|
|
|
| 205 |
client = OpenAI()
|
| 206 |
|
| 207 |
completion = client.chat.completions.create(
|