Spaces:
Running
Running
Upload 39 files
Browse files
App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py
CHANGED
@@ -12,6 +12,9 @@ from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
|
|
12 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
13 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
|
14 |
from App_Function_Libraries.Utils.Utils import cleanup_temp_files
|
|
|
|
|
|
|
15 |
#
|
16 |
#######################################################################################################################
|
17 |
# Functions:
|
|
|
12 |
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
|
13 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
|
14 |
from App_Function_Libraries.Utils.Utils import cleanup_temp_files
|
15 |
+
# Import metrics logging
|
16 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
17 |
+
from App_Function_Libraries.Metrics.logger_config import logger
|
18 |
#
|
19 |
#######################################################################################################################
|
20 |
# Functions:
|
App_Function_Libraries/Gradio_UI/Live_Recording.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Live_Recording.py
|
2 |
+
# Description: Gradio UI for live audio recording and transcription.
|
3 |
+
#
|
4 |
+
# Import necessary modules and functions
|
5 |
+
import logging
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
|
9 |
+
# External Imports
|
10 |
+
import gradio as gr
|
11 |
+
# Local Imports
|
12 |
+
from App_Function_Libraries.Audio.Audio_Transcription_Lib import (record_audio, speech_to_text, save_audio_temp,
|
13 |
+
stop_recording)
|
14 |
+
from App_Function_Libraries.DB.DB_Manager import add_media_to_database
|
15 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
16 |
+
#
|
17 |
+
#######################################################################################################################
|
18 |
+
#
|
19 |
+
# Functions:
|
20 |
+
|
21 |
+
whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
|
22 |
+
"distil-large-v2", "distil-medium.en", "distil-small.en"]
|
23 |
+
|
24 |
+
def create_live_recording_tab():
|
25 |
+
with gr.Tab("Live Recording and Transcription"):
|
26 |
+
gr.Markdown("# Live Audio Recording and Transcription")
|
27 |
+
with gr.Row():
|
28 |
+
with gr.Column():
|
29 |
+
duration = gr.Slider(minimum=1, maximum=8000, value=15, label="Recording Duration (seconds)")
|
30 |
+
whisper_models_input = gr.Dropdown(choices=whisper_models, value="medium", label="Whisper Model")
|
31 |
+
vad_filter = gr.Checkbox(label="Use VAD Filter")
|
32 |
+
save_recording = gr.Checkbox(label="Save Recording")
|
33 |
+
save_to_db = gr.Checkbox(label="Save Transcription to Database(Must be checked to save - can be checked afer transcription)", value=False)
|
34 |
+
custom_title = gr.Textbox(label="Custom Title (for database)", visible=False)
|
35 |
+
record_button = gr.Button("Start Recording")
|
36 |
+
stop_button = gr.Button("Stop Recording")
|
37 |
+
with gr.Column():
|
38 |
+
output = gr.Textbox(label="Transcription", lines=10)
|
39 |
+
audio_output = gr.Audio(label="Recorded Audio", visible=False)
|
40 |
+
|
41 |
+
recording_state = gr.State(value=None)
|
42 |
+
|
43 |
+
def start_recording(duration):
|
44 |
+
log_counter("live_recording_start_attempt", labels={"duration": duration})
|
45 |
+
p, stream, audio_queue, stop_event, audio_thread = record_audio(duration)
|
46 |
+
log_counter("live_recording_start_success", labels={"duration": duration})
|
47 |
+
return (p, stream, audio_queue, stop_event, audio_thread)
|
48 |
+
|
49 |
+
def end_recording_and_transcribe(recording_state, whisper_model, vad_filter, save_recording, save_to_db, custom_title):
|
50 |
+
log_counter("live_recording_end_attempt", labels={"model": whisper_model})
|
51 |
+
start_time = time.time()
|
52 |
+
|
53 |
+
if recording_state is None:
|
54 |
+
log_counter("live_recording_end_error", labels={"error": "Recording hasn't started yet"})
|
55 |
+
return "Recording hasn't started yet.", None
|
56 |
+
|
57 |
+
p, stream, audio_queue, stop_event, audio_thread = recording_state
|
58 |
+
audio_data = stop_recording(p, stream, audio_queue, stop_event, audio_thread)
|
59 |
+
|
60 |
+
temp_file = save_audio_temp(audio_data)
|
61 |
+
segments = speech_to_text(temp_file, whisper_model=whisper_model, vad_filter=vad_filter)
|
62 |
+
transcription = "\n".join([segment["Text"] for segment in segments])
|
63 |
+
|
64 |
+
if save_recording:
|
65 |
+
log_counter("live_recording_saved", labels={"model": whisper_model})
|
66 |
+
else:
|
67 |
+
os.remove(temp_file)
|
68 |
+
|
69 |
+
end_time = time.time() - start_time
|
70 |
+
log_histogram("live_recording_end_duration", end_time, labels={"model": whisper_model})
|
71 |
+
log_counter("live_recording_end_success", labels={"model": whisper_model})
|
72 |
+
return transcription, temp_file if save_recording else None
|
73 |
+
|
74 |
+
def save_transcription_to_db(transcription, custom_title):
|
75 |
+
log_counter("save_transcription_to_db_attempt")
|
76 |
+
start_time = time.time()
|
77 |
+
if custom_title.strip() == "":
|
78 |
+
custom_title = "Self-recorded Audio"
|
79 |
+
|
80 |
+
try:
|
81 |
+
url = "self_recorded"
|
82 |
+
info_dict = {
|
83 |
+
"title": custom_title,
|
84 |
+
"uploader": "self-recorded",
|
85 |
+
"webpage_url": url
|
86 |
+
}
|
87 |
+
segments = [{"Text": transcription}]
|
88 |
+
summary = ""
|
89 |
+
keywords = ["self-recorded", "audio"]
|
90 |
+
custom_prompt_input = ""
|
91 |
+
whisper_model = "self-recorded"
|
92 |
+
media_type = "audio"
|
93 |
+
|
94 |
+
result = add_media_to_database(
|
95 |
+
url=url,
|
96 |
+
info_dict=info_dict,
|
97 |
+
segments=segments,
|
98 |
+
summary=summary,
|
99 |
+
keywords=keywords,
|
100 |
+
custom_prompt_input=custom_prompt_input,
|
101 |
+
whisper_model=whisper_model,
|
102 |
+
media_type=media_type
|
103 |
+
)
|
104 |
+
end_time = time.time() - start_time
|
105 |
+
log_histogram("save_transcription_to_db_duration", end_time)
|
106 |
+
log_counter("save_transcription_to_db_success")
|
107 |
+
return f"Transcription saved to database successfully. {result}"
|
108 |
+
except Exception as e:
|
109 |
+
logging.error(f"Error saving transcription to database: {str(e)}")
|
110 |
+
log_counter("save_transcription_to_db_error", labels={"error": str(e)})
|
111 |
+
return f"Error saving transcription to database: {str(e)}"
|
112 |
+
|
113 |
+
def update_custom_title_visibility(save_to_db):
|
114 |
+
return gr.update(visible=save_to_db)
|
115 |
+
|
116 |
+
record_button.click(
|
117 |
+
fn=start_recording,
|
118 |
+
inputs=[duration],
|
119 |
+
outputs=[recording_state]
|
120 |
+
)
|
121 |
+
|
122 |
+
stop_button.click(
|
123 |
+
fn=end_recording_and_transcribe,
|
124 |
+
inputs=[recording_state, whisper_models_input, vad_filter, save_recording, save_to_db, custom_title],
|
125 |
+
outputs=[output, audio_output]
|
126 |
+
)
|
127 |
+
|
128 |
+
save_to_db.change(
|
129 |
+
fn=update_custom_title_visibility,
|
130 |
+
inputs=[save_to_db],
|
131 |
+
outputs=[custom_title]
|
132 |
+
)
|
133 |
+
|
134 |
+
gr.Button("Save to Database").click(
|
135 |
+
fn=save_transcription_to_db,
|
136 |
+
inputs=[output, custom_title],
|
137 |
+
outputs=gr.Textbox(label="Database Save Status")
|
138 |
+
)
|
139 |
+
|
140 |
+
#
|
141 |
+
# End of Functions
|
142 |
+
########################################################################################################################
|
App_Function_Libraries/Gradio_UI/Podcast_tab.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
#
|
4 |
# Imports
|
5 |
#
|
6 |
-
#
|
7 |
# External Imports
|
8 |
import gradio as gr
|
9 |
#
|
@@ -11,8 +10,6 @@ import gradio as gr
|
|
11 |
from App_Function_Libraries.Audio.Audio_Files import process_podcast
|
12 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
|
13 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
14 |
-
|
15 |
-
|
16 |
#
|
17 |
########################################################################################################################
|
18 |
#
|
|
|
3 |
#
|
4 |
# Imports
|
5 |
#
|
|
|
6 |
# External Imports
|
7 |
import gradio as gr
|
8 |
#
|
|
|
10 |
from App_Function_Libraries.Audio.Audio_Files import process_podcast
|
11 |
from App_Function_Libraries.DB.DB_Manager import load_preset_prompts
|
12 |
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models, update_user_prompt
|
|
|
|
|
13 |
#
|
14 |
########################################################################################################################
|
15 |
#
|
App_Function_Libraries/Gradio_UI/Video_transcription_tab.py
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
import json
|
6 |
import logging
|
7 |
import os
|
|
|
8 |
from typing import Dict, Any
|
9 |
|
10 |
#
|
@@ -23,6 +24,8 @@ from App_Function_Libraries.Utils.Utils import convert_to_seconds, safe_read_fil
|
|
23 |
create_download_directory, generate_unique_identifier, extract_text_from_segments
|
24 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
25 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
|
|
|
|
26 |
#
|
27 |
#######################################################################################################################
|
28 |
#
|
@@ -194,6 +197,8 @@ def create_video_transcription_tab():
|
|
194 |
timestamp_option, keep_original_video, summarize_recursively, overwrite_existing=False,
|
195 |
progress: gr.Progress = gr.Progress()) -> tuple:
|
196 |
try:
|
|
|
|
|
197 |
# FIXME - summarize_recursively is not being used...
|
198 |
logging.info("Entering process_videos_with_error_handling")
|
199 |
logging.info(f"Received inputs: {inputs}")
|
@@ -245,11 +250,17 @@ def create_video_transcription_tab():
|
|
245 |
all_transcriptions = {}
|
246 |
all_summaries = ""
|
247 |
|
|
|
|
|
|
|
|
|
248 |
for i in range(0, len(all_inputs), batch_size):
|
249 |
batch = all_inputs[i:i + batch_size]
|
250 |
batch_results = []
|
251 |
|
252 |
for input_item in batch:
|
|
|
|
|
253 |
try:
|
254 |
start_seconds = convert_to_seconds(start_time)
|
255 |
end_seconds = convert_to_seconds(end_time) if end_time else None
|
@@ -318,6 +329,14 @@ def create_video_transcription_tab():
|
|
318 |
batch_results.append(
|
319 |
(input_item, error_message, "Error", video_metadata, None, None))
|
320 |
errors.append(f"Error processing {input_item}: {error_message}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
else:
|
322 |
url, transcription, summary, json_file, summary_file, result_metadata = result
|
323 |
if transcription is None:
|
@@ -325,13 +344,56 @@ def create_video_transcription_tab():
|
|
325 |
batch_results.append(
|
326 |
(input_item, error_message, "Error", result_metadata, None, None))
|
327 |
errors.append(error_message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
else:
|
329 |
batch_results.append(
|
330 |
(input_item, transcription, "Success", result_metadata, json_file,
|
331 |
summary_file))
|
332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
error_message = f"Error processing {input_item}: {str(e)}"
|
336 |
logging.error(error_message, exc_info=True)
|
337 |
batch_results.append((input_item, error_message, "Error", {}, None, None))
|
@@ -409,6 +471,16 @@ def create_video_transcription_tab():
|
|
409 |
error_summary = "\n".join(errors) if errors else "No errors occurred."
|
410 |
|
411 |
total_inputs = len(all_inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
return (
|
413 |
f"Processed {total_inputs} videos. {len(errors)} errors occurred.",
|
414 |
error_summary,
|
@@ -418,6 +490,14 @@ def create_video_transcription_tab():
|
|
418 |
)
|
419 |
except Exception as e:
|
420 |
logging.error(f"Unexpected error in process_videos_with_error_handling: {str(e)}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
return (
|
422 |
f"An unexpected error occurred: {str(e)}",
|
423 |
str(e),
|
|
|
5 |
import json
|
6 |
import logging
|
7 |
import os
|
8 |
+
from datetime import datetime
|
9 |
from typing import Dict, Any
|
10 |
|
11 |
#
|
|
|
24 |
create_download_directory, generate_unique_identifier, extract_text_from_segments
|
25 |
from App_Function_Libraries.Video_DL_Ingestion_Lib import parse_and_expand_urls, extract_metadata, download_video
|
26 |
from App_Function_Libraries.Benchmarks_Evaluations.ms_g_eval import run_geval
|
27 |
+
# Import metrics logging
|
28 |
+
from App_Function_Libraries.Metrics.metrics_logger import log_counter, log_histogram
|
29 |
#
|
30 |
#######################################################################################################################
|
31 |
#
|
|
|
197 |
timestamp_option, keep_original_video, summarize_recursively, overwrite_existing=False,
|
198 |
progress: gr.Progress = gr.Progress()) -> tuple:
|
199 |
try:
|
200 |
+
# Start overall processing timer
|
201 |
+
proc_start_time = datetime.utcnow()
|
202 |
# FIXME - summarize_recursively is not being used...
|
203 |
logging.info("Entering process_videos_with_error_handling")
|
204 |
logging.info(f"Received inputs: {inputs}")
|
|
|
250 |
all_transcriptions = {}
|
251 |
all_summaries = ""
|
252 |
|
253 |
+
# Start timing
|
254 |
+
# FIXME - utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
|
255 |
+
start_proc = datetime.utcnow()
|
256 |
+
|
257 |
for i in range(0, len(all_inputs), batch_size):
|
258 |
batch = all_inputs[i:i + batch_size]
|
259 |
batch_results = []
|
260 |
|
261 |
for input_item in batch:
|
262 |
+
# Start individual video processing timer
|
263 |
+
video_start_time = datetime.utcnow()
|
264 |
try:
|
265 |
start_seconds = convert_to_seconds(start_time)
|
266 |
end_seconds = convert_to_seconds(end_time) if end_time else None
|
|
|
329 |
batch_results.append(
|
330 |
(input_item, error_message, "Error", video_metadata, None, None))
|
331 |
errors.append(f"Error processing {input_item}: {error_message}")
|
332 |
+
|
333 |
+
# Log failure metric
|
334 |
+
log_counter(
|
335 |
+
metric_name="videos_failed_total",
|
336 |
+
labels={"whisper_model": whisper_model, "api_name": api_name},
|
337 |
+
value=1
|
338 |
+
)
|
339 |
+
|
340 |
else:
|
341 |
url, transcription, summary, json_file, summary_file, result_metadata = result
|
342 |
if transcription is None:
|
|
|
344 |
batch_results.append(
|
345 |
(input_item, error_message, "Error", result_metadata, None, None))
|
346 |
errors.append(error_message)
|
347 |
+
|
348 |
+
# Log failure metric
|
349 |
+
log_counter(
|
350 |
+
metric_name="videos_failed_total",
|
351 |
+
labels={"whisper_model": whisper_model, "api_name": api_name},
|
352 |
+
value=1
|
353 |
+
)
|
354 |
+
|
355 |
else:
|
356 |
batch_results.append(
|
357 |
(input_item, transcription, "Success", result_metadata, json_file,
|
358 |
summary_file))
|
359 |
|
360 |
+
# Log success metric
|
361 |
+
log_counter(
|
362 |
+
metric_name="videos_processed_total",
|
363 |
+
labels={"whisper_model": whisper_model, "api_name": api_name},
|
364 |
+
value=1
|
365 |
+
)
|
366 |
+
|
367 |
+
# Calculate processing time
|
368 |
+
video_end_time = datetime.utcnow()
|
369 |
+
processing_time = (video_end_time - video_start_time).total_seconds()
|
370 |
+
log_histogram(
|
371 |
+
metric_name="video_processing_time_seconds",
|
372 |
+
value=processing_time,
|
373 |
+
labels={"whisper_model": whisper_model, "api_name": api_name}
|
374 |
+
)
|
375 |
+
|
376 |
+
# Log transcription and summary metrics
|
377 |
+
if transcription:
|
378 |
+
log_counter(
|
379 |
+
metric_name="transcriptions_generated_total",
|
380 |
+
labels={"whisper_model": whisper_model},
|
381 |
+
value=1
|
382 |
+
)
|
383 |
+
if summary:
|
384 |
+
log_counter(
|
385 |
+
metric_name="summaries_generated_total",
|
386 |
+
labels={"whisper_model": whisper_model},
|
387 |
+
value=1
|
388 |
+
)
|
389 |
|
390 |
except Exception as e:
|
391 |
+
# Log failure
|
392 |
+
log_counter(
|
393 |
+
metric_name="videos_failed_total",
|
394 |
+
labels={"whisper_model": whisper_model, "api_name": api_name},
|
395 |
+
value=1
|
396 |
+
)
|
397 |
error_message = f"Error processing {input_item}: {str(e)}"
|
398 |
logging.error(error_message, exc_info=True)
|
399 |
batch_results.append((input_item, error_message, "Error", {}, None, None))
|
|
|
471 |
error_summary = "\n".join(errors) if errors else "No errors occurred."
|
472 |
|
473 |
total_inputs = len(all_inputs)
|
474 |
+
|
475 |
+
# End overall processing timer
|
476 |
+
proc_end_time = datetime.utcnow()
|
477 |
+
total_processing_time = (proc_end_time - proc_start_time).total_seconds()
|
478 |
+
log_histogram(
|
479 |
+
metric_name="total_processing_time_seconds",
|
480 |
+
value=total_processing_time,
|
481 |
+
labels={"whisper_model": whisper_model, "api_name": api_name}
|
482 |
+
)
|
483 |
+
|
484 |
return (
|
485 |
f"Processed {total_inputs} videos. {len(errors)} errors occurred.",
|
486 |
error_summary,
|
|
|
490 |
)
|
491 |
except Exception as e:
|
492 |
logging.error(f"Unexpected error in process_videos_with_error_handling: {str(e)}", exc_info=True)
|
493 |
+
|
494 |
+
# Log unexpected failure metric
|
495 |
+
log_counter(
|
496 |
+
metric_name="videos_failed_total",
|
497 |
+
labels={"whisper_model": whisper_model, "api_name": api_name},
|
498 |
+
value=1
|
499 |
+
)
|
500 |
+
|
501 |
return (
|
502 |
f"An unexpected error occurred: {str(e)}",
|
503 |
str(e),
|