oceansweep commited on
Commit
8d1d1bc
·
verified ·
1 Parent(s): 9db5a77

Update App_Function_Libraries/Audio_Files.py

Browse files
Files changed (1) hide show
  1. App_Function_Libraries/Audio_Files.py +691 -628
App_Function_Libraries/Audio_Files.py CHANGED
@@ -1,629 +1,692 @@
1
- # Audio_Files.py
2
- #########################################
3
- # Audio Processing Library
4
- # This library is used to download or load audio files from a local directory.
5
- #
6
- ####
7
- #
8
- # Functions:
9
- #
10
- # download_audio_file(url, save_path)
11
- # process_audio(
12
- # process_audio_file(audio_url, audio_file, whisper_model="small.en", api_name=None, api_key=None)
13
- #
14
- #
15
- #########################################
16
- # Imports
17
- import json
18
- import logging
19
- import subprocess
20
- import sys
21
- import tempfile
22
- import uuid
23
- from datetime import datetime
24
-
25
- import requests
26
- import os
27
- from gradio import gradio
28
- import yt_dlp
29
-
30
- from App_Function_Libraries.Audio_Transcription_Lib import speech_to_text
31
- from App_Function_Libraries.Chunk_Lib import improved_chunking_process
32
- #
33
- # Local Imports
34
- from App_Function_Libraries.SQLite_DB import add_media_to_database, add_media_with_keywords
35
- from App_Function_Libraries.Utils import create_download_directory, save_segments_to_json
36
- from App_Function_Libraries.Summarization_General_Lib import save_transcription_and_summary, perform_transcription, \
37
- perform_summarization
38
- from App_Function_Libraries.Video_DL_Ingestion_Lib import extract_metadata
39
-
40
- #
41
- #######################################################################################################################
42
- # Function Definitions
43
- #
44
-
45
- MAX_FILE_SIZE = 500 * 1024 * 1024
46
-
47
-
48
- def download_audio_file(url, use_cookies=False, cookies=None):
49
- try:
50
- # Set up the request headers
51
- headers = {}
52
- if use_cookies and cookies:
53
- try:
54
- cookie_dict = json.loads(cookies)
55
- headers['Cookie'] = '; '.join([f'{k}={v}' for k, v in cookie_dict.items()])
56
- except json.JSONDecodeError:
57
- logging.warning("Invalid cookie format. Proceeding without cookies.")
58
-
59
- # Make the request
60
- response = requests.get(url, headers=headers, stream=True)
61
- response.raise_for_status() # Raise an exception for bad status codes
62
-
63
- # Get the file size
64
- file_size = int(response.headers.get('content-length', 0))
65
- if file_size > 500 * 1024 * 1024: # 500 MB limit
66
- raise ValueError("File size exceeds the 500MB limit.")
67
-
68
- # Generate a unique filename
69
- file_name = f"audio_{uuid.uuid4().hex[:8]}.mp3"
70
- save_path = os.path.join('downloads', file_name)
71
-
72
- # Ensure the downloads directory exists
73
- os.makedirs('downloads', exist_ok=True)
74
-
75
- # Download the file
76
- with open(save_path, 'wb') as f:
77
- for chunk in response.iter_content(chunk_size=8192):
78
- if chunk:
79
- f.write(chunk)
80
-
81
- logging.info(f"Audio file downloaded successfully: {save_path}")
82
- return save_path
83
-
84
- except requests.RequestException as e:
85
- logging.error(f"Error downloading audio file: {str(e)}")
86
- raise
87
- except ValueError as e:
88
- logging.error(str(e))
89
- raise
90
- except Exception as e:
91
- logging.error(f"Unexpected error downloading audio file: {str(e)}")
92
- raise
93
-
94
-
95
- def process_audio(
96
- audio_file_path,
97
- num_speakers=2,
98
- whisper_model="small.en",
99
- custom_prompt_input=None,
100
- offset=0,
101
- api_name=None,
102
- api_key=None,
103
- vad_filter=False,
104
- rolling_summarization=False,
105
- detail_level=0.01,
106
- keywords="default,no_keyword_set",
107
- chunk_text_by_words=False,
108
- max_words=0,
109
- chunk_text_by_sentences=False,
110
- max_sentences=0,
111
- chunk_text_by_paragraphs=False,
112
- max_paragraphs=0,
113
- chunk_text_by_tokens=False,
114
- max_tokens=0
115
- ):
116
- try:
117
-
118
- # Perform transcription
119
- audio_file_path, segments = perform_transcription(audio_file_path, offset, whisper_model, vad_filter)
120
-
121
- if audio_file_path is None or segments is None:
122
- logging.error("Process_Audio: Transcription failed or segments not available.")
123
- return "Process_Audio: Transcription failed.", None, None, None, None, None
124
-
125
- logging.debug(f"Process_Audio: Transcription audio_file: {audio_file_path}")
126
- logging.debug(f"Process_Audio: Transcription segments: {segments}")
127
-
128
- transcription_text = {'audio_file': audio_file_path, 'transcription': segments}
129
- logging.debug(f"Process_Audio: Transcription text: {transcription_text}")
130
-
131
- # Save segments to JSON
132
- segments_json_path = save_segments_to_json(segments)
133
-
134
- # Perform summarization
135
- summary_text = None
136
- if api_name:
137
- if rolling_summarization is not None:
138
- pass
139
- # FIXME rolling summarization
140
- # summary_text = rolling_summarize_function(
141
- # transcription_text,
142
- # detail=detail_level,
143
- # api_name=api_name,
144
- # api_key=api_key,
145
- # custom_prompt=custom_prompt_input,
146
- # chunk_by_words=chunk_text_by_words,
147
- # max_words=max_words,
148
- # chunk_by_sentences=chunk_text_by_sentences,
149
- # max_sentences=max_sentences,
150
- # chunk_by_paragraphs=chunk_text_by_paragraphs,
151
- # max_paragraphs=max_paragraphs,
152
- # chunk_by_tokens=chunk_text_by_tokens,
153
- # max_tokens=max_tokens
154
- # )
155
- else:
156
- summary_text = perform_summarization(api_name, segments_json_path, custom_prompt_input, api_key)
157
-
158
- if summary_text is None:
159
- logging.error("Summary text is None. Check summarization function.")
160
- summary_file_path = None
161
- else:
162
- summary_text = 'Summary not available'
163
- summary_file_path = None
164
-
165
- # Save transcription and summary
166
- download_path = create_download_directory("Audio_Processing")
167
- json_file_path, summary_file_path = save_transcription_and_summary(transcription_text, summary_text,
168
- download_path)
169
-
170
- # Update function call to add_media_to_database so that it properly applies the title, author and file type
171
- # Add to database
172
- add_media_to_database(None, {'title': 'Audio File', 'author': 'Unknown'}, segments, summary_text, keywords,
173
- custom_prompt_input, whisper_model)
174
-
175
- return transcription_text, summary_text, json_file_path, summary_file_path, None, None
176
-
177
- except Exception as e:
178
- logging.error(f"Error in process_audio: {str(e)}")
179
- return str(e), None, None, None, None, None
180
-
181
-
182
- def process_single_audio(audio_file_path, whisper_model, api_name, api_key, keep_original,custom_keywords, source,
183
- custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
184
- use_multi_level_chunking, chunk_language):
185
- progress = []
186
- transcription = ""
187
- summary = ""
188
-
189
- def update_progress(message):
190
- progress.append(message)
191
- return "\n".join(progress)
192
-
193
- try:
194
- # Check file size before processing
195
- file_size = os.path.getsize(audio_file_path)
196
- if file_size > MAX_FILE_SIZE:
197
- update_progress(f"File size ({file_size / (1024 * 1024):.2f} MB) exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f} MB. Skipping this file.")
198
- return "\n".join(progress), "", ""
199
-
200
- # Perform transcription
201
- update_progress("Starting transcription...")
202
- segments = speech_to_text(audio_file_path, whisper_model=whisper_model)
203
- transcription = " ".join([segment['Text'] for segment in segments])
204
- update_progress("Audio transcribed successfully.")
205
-
206
- # Perform summarization if API is provided
207
- if api_name and api_key:
208
- update_progress("Starting summarization...")
209
- summary = perform_summarization(api_name, transcription, "Summarize the following audio transcript",
210
- api_key)
211
- update_progress("Audio summarized successfully.")
212
- else:
213
- summary = "No summary available"
214
-
215
- # Prepare keywords
216
- keywords = "audio,transcription"
217
- if custom_keywords:
218
- keywords += f",{custom_keywords}"
219
-
220
- # Add to database
221
- add_media_with_keywords(
222
- url=source,
223
- title=os.path.basename(audio_file_path),
224
- media_type='audio',
225
- content=transcription,
226
- keywords=keywords,
227
- prompt="Summarize the following audio transcript",
228
- summary=summary,
229
- transcription_model=whisper_model,
230
- author="Unknown",
231
- ingestion_date=None # This will use the current date
232
- )
233
- update_progress("Audio file added to database successfully.")
234
-
235
- if not keep_original and source != "Uploaded File":
236
- os.remove(audio_file_path)
237
- update_progress(f"Temporary file {audio_file_path} removed.")
238
- elif keep_original and source != "Uploaded File":
239
- update_progress(f"Original audio file kept at: {audio_file_path}")
240
-
241
- except Exception as e:
242
- update_progress(f"Error processing {source}: {str(e)}")
243
- transcription = f"Error: {str(e)}"
244
- summary = "No summary due to error"
245
-
246
- return "\n".join(progress), transcription, summary
247
-
248
-
249
- def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key, use_cookies, cookies, keep_original,
250
- custom_keywords, custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap,
251
- use_adaptive_chunking, use_multi_level_chunking, chunk_language, diarize):
252
- progress = []
253
- temp_files = []
254
- all_transcriptions = []
255
- all_summaries = []
256
-
257
- def update_progress(message):
258
- progress.append(message)
259
- return "\n".join(progress)
260
-
261
- def cleanup_files():
262
- for file in temp_files:
263
- try:
264
- if os.path.exists(file):
265
- os.remove(file)
266
- update_progress(f"Temporary file {file} removed.")
267
- except Exception as e:
268
- update_progress(f"Failed to remove temporary file {file}: {str(e)}")
269
-
270
- def reencode_mp3(mp3_file_path):
271
- try:
272
- reencoded_mp3_path = mp3_file_path.replace(".mp3", "_reencoded.mp3")
273
- subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, '-codec:a', 'libmp3lame', reencoded_mp3_path], check=True)
274
- update_progress(f"Re-encoded {mp3_file_path} to {reencoded_mp3_path}.")
275
- return reencoded_mp3_path
276
- except subprocess.CalledProcessError as e:
277
- update_progress(f"Error re-encoding {mp3_file_path}: {str(e)}")
278
- raise
279
-
280
- def convert_mp3_to_wav(mp3_file_path):
281
- try:
282
- wav_file_path = mp3_file_path.replace(".mp3", ".wav")
283
- subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, wav_file_path], check=True)
284
- update_progress(f"Converted {mp3_file_path} to {wav_file_path}.")
285
- return wav_file_path
286
- except subprocess.CalledProcessError as e:
287
- update_progress(f"Error converting {mp3_file_path} to WAV: {str(e)}")
288
- raise
289
-
290
- try:
291
- # Check and set the ffmpeg command
292
- global ffmpeg_cmd
293
- if os.name == "nt":
294
- logging.debug("Running on Windows")
295
- ffmpeg_cmd = os.path.join(os.getcwd(), "Bin", "ffmpeg.exe")
296
- else:
297
- ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
298
-
299
- # Ensure ffmpeg is accessible
300
- if not os.path.exists(ffmpeg_cmd) and os.name == "nt":
301
- raise FileNotFoundError(f"ffmpeg executable not found at path: {ffmpeg_cmd}")
302
-
303
- # Define chunk options early to avoid undefined errors
304
- chunk_options = {
305
- 'method': chunk_method,
306
- 'max_size': max_chunk_size,
307
- 'overlap': chunk_overlap,
308
- 'adaptive': use_adaptive_chunking,
309
- 'multi_level': use_multi_level_chunking,
310
- 'language': chunk_language
311
- }
312
-
313
- # Process multiple URLs
314
- urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
315
-
316
- for i, url in enumerate(urls):
317
- update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
318
-
319
- # Download and process audio file
320
- audio_file_path = download_audio_file(url, use_cookies, cookies)
321
- if not os.path.exists(audio_file_path):
322
- update_progress(f"Downloaded file not found: {audio_file_path}")
323
- continue
324
-
325
- temp_files.append(audio_file_path)
326
- update_progress("Audio file downloaded successfully.")
327
-
328
- # Re-encode MP3 to fix potential issues
329
- reencoded_mp3_path = reencode_mp3(audio_file_path)
330
- if not os.path.exists(reencoded_mp3_path):
331
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
332
- continue
333
-
334
- temp_files.append(reencoded_mp3_path)
335
-
336
- # Convert re-encoded MP3 to WAV
337
- wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
338
- if not os.path.exists(wav_file_path):
339
- update_progress(f"Converted WAV file not found: {wav_file_path}")
340
- continue
341
-
342
- temp_files.append(wav_file_path)
343
-
344
- # Initialize transcription
345
- transcription = ""
346
-
347
- # Transcribe audio
348
- if diarize:
349
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
350
- else:
351
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
352
-
353
- # Handle segments nested under 'segments' key
354
- if isinstance(segments, dict) and 'segments' in segments:
355
- segments = segments['segments']
356
-
357
- if isinstance(segments, list):
358
- transcription = " ".join([segment.get('Text', '') for segment in segments])
359
- update_progress("Audio transcribed successfully.")
360
- else:
361
- update_progress("Unexpected segments format received from speech_to_text.")
362
- logging.error(f"Unexpected segments format: {segments}")
363
- continue
364
-
365
- if not transcription.strip():
366
- update_progress("Transcription is empty.")
367
- else:
368
- # Apply chunking
369
- chunked_text = improved_chunking_process(transcription, chunk_options)
370
-
371
- # Summarize
372
- if api_name:
373
- try:
374
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
375
- update_progress("Audio summarized successfully.")
376
- except Exception as e:
377
- logging.error(f"Error during summarization: {str(e)}")
378
- summary = "Summary generation failed"
379
- else:
380
- summary = "No summary available (API not provided)"
381
-
382
- all_transcriptions.append(transcription)
383
- all_summaries.append(summary)
384
-
385
- # Add to database
386
- add_media_with_keywords(
387
- url=url,
388
- title=os.path.basename(wav_file_path),
389
- media_type='audio',
390
- content=transcription,
391
- keywords=custom_keywords,
392
- prompt=custom_prompt_input,
393
- summary=summary,
394
- transcription_model=whisper_model,
395
- author="Unknown",
396
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
397
- )
398
- update_progress("Audio file processed and added to database.")
399
-
400
- # Process uploaded file if provided
401
- if audio_file:
402
- if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
403
- update_progress(
404
- f"Uploaded file size exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB. Skipping this file.")
405
- else:
406
- # Re-encode MP3 to fix potential issues
407
- reencoded_mp3_path = reencode_mp3(audio_file.name)
408
- if not os.path.exists(reencoded_mp3_path):
409
- update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
410
- return update_progress("Processing failed: Re-encoded file not found"), "", ""
411
-
412
- temp_files.append(reencoded_mp3_path)
413
-
414
- # Convert re-encoded MP3 to WAV
415
- wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
416
- if not os.path.exists(wav_file_path):
417
- update_progress(f"Converted WAV file not found: {wav_file_path}")
418
- return update_progress("Processing failed: Converted WAV file not found"), "", ""
419
-
420
- temp_files.append(wav_file_path)
421
-
422
- # Initialize transcription
423
- transcription = ""
424
-
425
- if diarize:
426
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
427
- else:
428
- segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
429
-
430
- # Handle segments nested under 'segments' key
431
- if isinstance(segments, dict) and 'segments' in segments:
432
- segments = segments['segments']
433
-
434
- if isinstance(segments, list):
435
- transcription = " ".join([segment.get('Text', '') for segment in segments])
436
- else:
437
- update_progress("Unexpected segments format received from speech_to_text.")
438
- logging.error(f"Unexpected segments format: {segments}")
439
-
440
- chunked_text = improved_chunking_process(transcription, chunk_options)
441
-
442
- if api_name and api_key:
443
- try:
444
- summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
445
- update_progress("Audio summarized successfully.")
446
- except Exception as e:
447
- logging.error(f"Error during summarization: {str(e)}")
448
- summary = "Summary generation failed"
449
- else:
450
- summary = "No summary available (API not provided)"
451
-
452
- all_transcriptions.append(transcription)
453
- all_summaries.append(summary)
454
-
455
- add_media_with_keywords(
456
- url="Uploaded File",
457
- title=os.path.basename(wav_file_path),
458
- media_type='audio',
459
- content=transcription,
460
- keywords=custom_keywords,
461
- prompt=custom_prompt_input,
462
- summary=summary,
463
- transcription_model=whisper_model,
464
- author="Unknown",
465
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
466
- )
467
- update_progress("Uploaded file processed and added to database.")
468
-
469
- # Final cleanup
470
- if not keep_original:
471
- cleanup_files()
472
-
473
- final_progress = update_progress("All processing complete.")
474
- final_transcriptions = "\n\n".join(all_transcriptions)
475
- final_summaries = "\n\n".join(all_summaries)
476
-
477
- return final_progress, final_transcriptions, final_summaries
478
-
479
- except Exception as e:
480
- logging.error(f"Error processing audio files: {str(e)}")
481
- cleanup_files()
482
- return update_progress(f"Processing failed: {str(e)}"), "", ""
483
-
484
-
485
- def download_youtube_audio(url: str) -> str:
486
- ydl_opts = {
487
- 'format': 'bestaudio/best',
488
- 'postprocessors': [{
489
- 'key': 'FFmpegExtractAudio',
490
- 'preferredcodec': 'wav',
491
- 'preferredquality': '192',
492
- }],
493
- 'outtmpl': '%(title)s.%(ext)s'
494
- }
495
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
496
- info = ydl.extract_info(url, download=True)
497
- filename = ydl.prepare_filename(info)
498
- return filename.rsplit('.', 1)[0] + '.wav'
499
-
500
-
501
- def process_podcast(url, title, author, keywords, custom_prompt, api_name, api_key, whisper_model,
502
- keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
503
- chunk_method=None, max_chunk_size=300, chunk_overlap=0, use_adaptive_chunking=False,
504
- use_multi_level_chunking=False, chunk_language='english'):
505
- progress = []
506
- error_message = ""
507
- temp_files = []
508
-
509
- def update_progress(message):
510
- progress.append(message)
511
- return "\n".join(progress)
512
-
513
- def cleanup_files():
514
- if not keep_original:
515
- for file in temp_files:
516
- try:
517
- if os.path.exists(file):
518
- os.remove(file)
519
- update_progress(f"Temporary file {file} removed.")
520
- except Exception as e:
521
- update_progress(f"Failed to remove temporary file {file}: {str(e)}")
522
-
523
- try:
524
- # Download podcast
525
- audio_file = download_audio_file(url, use_cookies, cookies)
526
- temp_files.append(audio_file)
527
- update_progress("Podcast downloaded successfully.")
528
-
529
- # Extract metadata
530
- metadata = extract_metadata(url)
531
- title = title or metadata.get('title', 'Unknown Podcast')
532
- author = author or metadata.get('uploader', 'Unknown Author')
533
-
534
- # Format metadata for storage
535
- metadata_text = f"""
536
- Metadata:
537
- Title: {title}
538
- Author: {author}
539
- Series: {metadata.get('series', 'N/A')}
540
- Episode: {metadata.get('episode', 'N/A')}
541
- Season: {metadata.get('season', 'N/A')}
542
- Upload Date: {metadata.get('upload_date', 'N/A')}
543
- Duration: {metadata.get('duration', 'N/A')} seconds
544
- Description: {metadata.get('description', 'N/A')}
545
- """
546
-
547
- # Update keywords
548
- new_keywords = []
549
- if metadata.get('series'):
550
- new_keywords.append(f"series:{metadata['series']}")
551
- if metadata.get('episode'):
552
- new_keywords.append(f"episode:{metadata['episode']}")
553
- if metadata.get('season'):
554
- new_keywords.append(f"season:{metadata['season']}")
555
-
556
- keywords = f"{keywords},{','.join(new_keywords)}" if keywords else ','.join(new_keywords)
557
-
558
- update_progress(f"Metadata extracted - Title: {title}, Author: {author}, Keywords: {keywords}")
559
-
560
- # Transcribe the podcast
561
- try:
562
- if enable_diarization:
563
- segments = speech_to_text(audio_file, whisper_model=whisper_model, diarize=True)
564
- else:
565
- segments = speech_to_text(audio_file, whisper_model=whisper_model)
566
- transcription = " ".join([segment['Text'] for segment in segments])
567
- update_progress("Podcast transcribed successfully.")
568
- except Exception as e:
569
- error_message = f"Transcription failed: {str(e)}"
570
- raise
571
-
572
- # Apply chunking
573
- chunk_options = {
574
- 'method': chunk_method,
575
- 'max_size': max_chunk_size,
576
- 'overlap': chunk_overlap,
577
- 'adaptive': use_adaptive_chunking,
578
- 'multi_level': use_multi_level_chunking,
579
- 'language': chunk_language
580
- }
581
- chunked_text = improved_chunking_process(transcription, chunk_options)
582
-
583
- # Combine metadata and transcription
584
- full_content = metadata_text + "\n\nTranscription:\n" + transcription
585
-
586
- # Summarize if API is provided
587
- summary = None
588
- if api_name and api_key:
589
- try:
590
- summary = perform_summarization(api_name, chunked_text, custom_prompt, api_key)
591
- update_progress("Podcast summarized successfully.")
592
- except Exception as e:
593
- error_message = f"Summarization failed: {str(e)}"
594
- raise
595
-
596
- # Add to database
597
- try:
598
- add_media_with_keywords(
599
- url=url,
600
- title=title,
601
- media_type='podcast',
602
- content=full_content,
603
- keywords=keywords,
604
- prompt=custom_prompt,
605
- summary=summary or "No summary available",
606
- transcription_model=whisper_model,
607
- author=author,
608
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
609
- )
610
- update_progress("Podcast added to database successfully.")
611
- except Exception as e:
612
- error_message = f"Error adding podcast to database: {str(e)}"
613
- raise
614
-
615
- # Cleanup
616
- cleanup_files()
617
-
618
- return (update_progress("Processing complete."), full_content, summary or "No summary generated.",
619
- title, author, keywords, error_message)
620
-
621
- except Exception as e:
622
- logging.error(f"Error processing podcast: {str(e)}")
623
- cleanup_files()
624
- return update_progress(f"Processing failed: {str(e)}"), "", "", "", "", "", str(e)
625
-
626
-
627
- #
628
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
  #######################################################################################################################
 
1
+ # Audio_Files.py
2
+ #########################################
3
+ # Audio Processing Library
4
+ # This library is used to download or load audio files from a local directory.
5
+ #
6
+ ####
7
+ #
8
+ # Functions:
9
+ #
10
+ # download_audio_file(url, save_path)
11
+ # process_audio(
12
+ # process_audio_file(audio_url, audio_file, whisper_model="small.en", api_name=None, api_key=None)
13
+ #
14
+ #
15
+ #########################################
16
+ # Imports
17
+ import json
18
+ import logging
19
+ import os
20
+ import subprocess
21
+ import tempfile
22
+ import uuid
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+
26
+ import requests
27
+ import yt_dlp
28
+
29
+ from App_Function_Libraries.Audio_Transcription_Lib import speech_to_text
30
+ from App_Function_Libraries.Chunk_Lib import improved_chunking_process
31
+ #
32
+ # Local Imports
33
+ from App_Function_Libraries.SQLite_DB import add_media_to_database, add_media_with_keywords, \
34
+ check_media_and_whisper_model
35
+ from App_Function_Libraries.Summarization_General_Lib import save_transcription_and_summary, perform_transcription, \
36
+ perform_summarization
37
+ from App_Function_Libraries.Utils import create_download_directory, save_segments_to_json, downloaded_files, \
38
+ sanitize_filename
39
+ from App_Function_Libraries.Video_DL_Ingestion_Lib import extract_metadata
40
+
41
+ #
42
+ #######################################################################################################################
43
+ # Function Definitions
44
+ #
45
+
46
+ MAX_FILE_SIZE = 500 * 1024 * 1024
47
+
48
+
49
+ def download_audio_file(url, current_whisper_model="", use_cookies=False, cookies=None):
50
+ try:
51
+ # Check if media already exists in the database and compare whisper models
52
+ should_download, reason = check_media_and_whisper_model(
53
+ url=url,
54
+ current_whisper_model=current_whisper_model
55
+ )
56
+
57
+ if not should_download:
58
+ logging.info(f"Skipping audio download: {reason}")
59
+ return None
60
+
61
+ logging.info(f"Proceeding with audio download: {reason}")
62
+
63
+ # Set up the request headers
64
+ headers = {}
65
+ if use_cookies and cookies:
66
+ try:
67
+ cookie_dict = json.loads(cookies)
68
+ headers['Cookie'] = '; '.join([f'{k}={v}' for k, v in cookie_dict.items()])
69
+ except json.JSONDecodeError:
70
+ logging.warning("Invalid cookie format. Proceeding without cookies.")
71
+
72
+ # Make the request
73
+ response = requests.get(url, headers=headers, stream=True)
74
+ # Raise an exception for bad status codes
75
+ response.raise_for_status()
76
+
77
+ # Get the file size
78
+ file_size = int(response.headers.get('content-length', 0))
79
+ if file_size > 500 * 1024 * 1024: # 500 MB limit
80
+ raise ValueError("File size exceeds the 500MB limit.")
81
+
82
+ # Generate a unique filename
83
+ file_name = f"audio_{uuid.uuid4().hex[:8]}.mp3"
84
+ save_path = os.path.join('downloads', file_name)
85
+
86
+ # Ensure the downloads directory exists
87
+ os.makedirs('downloads', exist_ok=True)
88
+
89
+
90
+ # Download the file
91
+ with open(save_path, 'wb') as f:
92
+ for chunk in response.iter_content(chunk_size=8192):
93
+ if chunk:
94
+ f.write(chunk)
95
+
96
+ logging.info(f"Audio file downloaded successfully: {save_path}")
97
+ return save_path
98
+
99
+ except requests.RequestException as e:
100
+ logging.error(f"Error downloading audio file: {str(e)}")
101
+ raise
102
+ except ValueError as e:
103
+ logging.error(str(e))
104
+ raise
105
+ except Exception as e:
106
+ logging.error(f"Unexpected error downloading audio file: {str(e)}")
107
+ raise
108
+
109
+
110
+ def process_audio(
111
+ audio_file_path,
112
+ num_speakers=2,
113
+ whisper_model="small.en",
114
+ custom_prompt_input=None,
115
+ offset=0,
116
+ api_name=None,
117
+ api_key=None,
118
+ vad_filter=False,
119
+ rolling_summarization=False,
120
+ detail_level=0.01,
121
+ keywords="default,no_keyword_set",
122
+ chunk_text_by_words=False,
123
+ max_words=0,
124
+ chunk_text_by_sentences=False,
125
+ max_sentences=0,
126
+ chunk_text_by_paragraphs=False,
127
+ max_paragraphs=0,
128
+ chunk_text_by_tokens=False,
129
+ max_tokens=0
130
+ ):
131
+ try:
132
+
133
+ # Perform transcription
134
+ audio_file_path, segments = perform_transcription(audio_file_path, offset, whisper_model, vad_filter)
135
+
136
+ if audio_file_path is None or segments is None:
137
+ logging.error("Process_Audio: Transcription failed or segments not available.")
138
+ return "Process_Audio: Transcription failed.", None, None, None, None, None
139
+
140
+ logging.debug(f"Process_Audio: Transcription audio_file: {audio_file_path}")
141
+ logging.debug(f"Process_Audio: Transcription segments: {segments}")
142
+
143
+ transcription_text = {'audio_file': audio_file_path, 'transcription': segments}
144
+ logging.debug(f"Process_Audio: Transcription text: {transcription_text}")
145
+
146
+ # Save segments to JSON
147
+ segments_json_path = save_segments_to_json(segments)
148
+
149
+ # Perform summarization
150
+ summary_text = None
151
+ if api_name:
152
+ if rolling_summarization is not None:
153
+ pass
154
+ # FIXME rolling summarization
155
+ # summary_text = rolling_summarize_function(
156
+ # transcription_text,
157
+ # detail=detail_level,
158
+ # api_name=api_name,
159
+ # api_key=api_key,
160
+ # custom_prompt=custom_prompt_input,
161
+ # chunk_by_words=chunk_text_by_words,
162
+ # max_words=max_words,
163
+ # chunk_by_sentences=chunk_text_by_sentences,
164
+ # max_sentences=max_sentences,
165
+ # chunk_by_paragraphs=chunk_text_by_paragraphs,
166
+ # max_paragraphs=max_paragraphs,
167
+ # chunk_by_tokens=chunk_text_by_tokens,
168
+ # max_tokens=max_tokens
169
+ # )
170
+ else:
171
+ summary_text = perform_summarization(api_name, segments_json_path, custom_prompt_input, api_key)
172
+
173
+ if summary_text is None:
174
+ logging.error("Summary text is None. Check summarization function.")
175
+ summary_file_path = None
176
+ else:
177
+ summary_text = 'Summary not available'
178
+ summary_file_path = None
179
+
180
+ # Save transcription and summary
181
+ download_path = create_download_directory("Audio_Processing")
182
+ json_file_path, summary_file_path = save_transcription_and_summary(transcription_text, summary_text,
183
+ download_path)
184
+
185
+ # Update function call to add_media_to_database so that it properly applies the title, author and file type
186
+ # Add to database
187
+ add_media_to_database(None, {'title': 'Audio File', 'author': 'Unknown'}, segments, summary_text, keywords,
188
+ custom_prompt_input, whisper_model)
189
+
190
+ return transcription_text, summary_text, json_file_path, summary_file_path, None, None
191
+
192
+ except Exception as e:
193
+ logging.error(f"Error in process_audio: {str(e)}")
194
+ return str(e), None, None, None, None, None
195
+
196
+
197
+ def process_single_audio(audio_file_path, whisper_model, api_name, api_key, keep_original,custom_keywords, source,
198
+ custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap, use_adaptive_chunking,
199
+ use_multi_level_chunking, chunk_language):
200
+ progress = []
201
+ transcription = ""
202
+ summary = ""
203
+
204
+ def update_progress(message):
205
+ progress.append(message)
206
+ return "\n".join(progress)
207
+
208
+ try:
209
+ # Check file size before processing
210
+ file_size = os.path.getsize(audio_file_path)
211
+ if file_size > MAX_FILE_SIZE:
212
+ update_progress(f"File size ({file_size / (1024 * 1024):.2f} MB) exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f} MB. Skipping this file.")
213
+ return "\n".join(progress), "", ""
214
+
215
+ # Perform transcription
216
+ update_progress("Starting transcription...")
217
+ segments = speech_to_text(audio_file_path, whisper_model=whisper_model)
218
+ transcription = " ".join([segment['Text'] for segment in segments])
219
+ update_progress("Audio transcribed successfully.")
220
+
221
+ # Perform summarization if API is provided
222
+ if api_name and api_key:
223
+ update_progress("Starting summarization...")
224
+ summary = perform_summarization(api_name, transcription, "Summarize the following audio transcript",
225
+ api_key)
226
+ update_progress("Audio summarized successfully.")
227
+ else:
228
+ summary = "No summary available"
229
+
230
+ # Prepare keywords
231
+ keywords = "audio,transcription"
232
+ if custom_keywords:
233
+ keywords += f",{custom_keywords}"
234
+
235
+ # Add to database
236
+ add_media_with_keywords(
237
+ url=source,
238
+ title=os.path.basename(audio_file_path),
239
+ media_type='audio',
240
+ content=transcription,
241
+ keywords=keywords,
242
+ prompt="Summarize the following audio transcript",
243
+ summary=summary,
244
+ transcription_model=whisper_model,
245
+ author="Unknown",
246
+ ingestion_date=None # This will use the current date
247
+ )
248
+ update_progress("Audio file added to database successfully.")
249
+
250
+ if not keep_original and source != "Uploaded File":
251
+ os.remove(audio_file_path)
252
+ update_progress(f"Temporary file {audio_file_path} removed.")
253
+ elif keep_original and source != "Uploaded File":
254
+ update_progress(f"Original audio file kept at: {audio_file_path}")
255
+
256
+ except Exception as e:
257
+ update_progress(f"Error processing {source}: {str(e)}")
258
+ transcription = f"Error: {str(e)}"
259
+ summary = "No summary due to error"
260
+
261
+ return "\n".join(progress), transcription, summary
262
+
263
+
264
+ def process_audio_files(audio_urls, audio_file, whisper_model, api_name, api_key, use_cookies, cookies, keep_original,
265
+ custom_keywords, custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap,
266
+ use_adaptive_chunking, use_multi_level_chunking, chunk_language, diarize):
267
+ progress = []
268
+ temp_files = []
269
+ all_transcriptions = []
270
+ all_summaries = []
271
+
272
+ def update_progress(message):
273
+ progress.append(message)
274
+ return "\n".join(progress)
275
+
276
+ def cleanup_files():
277
+ for file in temp_files:
278
+ try:
279
+ if os.path.exists(file):
280
+ os.remove(file)
281
+ update_progress(f"Temporary file {file} removed.")
282
+ except Exception as e:
283
+ update_progress(f"Failed to remove temporary file {file}: {str(e)}")
284
+
285
+ def reencode_mp3(mp3_file_path):
286
+ try:
287
+ reencoded_mp3_path = mp3_file_path.replace(".mp3", "_reencoded.mp3")
288
+ subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, '-codec:a', 'libmp3lame', reencoded_mp3_path], check=True)
289
+ update_progress(f"Re-encoded {mp3_file_path} to {reencoded_mp3_path}.")
290
+ return reencoded_mp3_path
291
+ except subprocess.CalledProcessError as e:
292
+ update_progress(f"Error re-encoding {mp3_file_path}: {str(e)}")
293
+ raise
294
+
295
+ def convert_mp3_to_wav(mp3_file_path):
296
+ try:
297
+ wav_file_path = mp3_file_path.replace(".mp3", ".wav")
298
+ subprocess.run([ffmpeg_cmd, '-i', mp3_file_path, wav_file_path], check=True)
299
+ update_progress(f"Converted {mp3_file_path} to {wav_file_path}.")
300
+ return wav_file_path
301
+ except subprocess.CalledProcessError as e:
302
+ update_progress(f"Error converting {mp3_file_path} to WAV: {str(e)}")
303
+ raise
304
+
305
+ try:
306
+ # Check and set the ffmpeg command
307
+ global ffmpeg_cmd
308
+ if os.name == "nt":
309
+ logging.debug("Running on Windows")
310
+ ffmpeg_cmd = os.path.join(os.getcwd(), "Bin", "ffmpeg.exe")
311
+ else:
312
+ ffmpeg_cmd = 'ffmpeg' # Assume 'ffmpeg' is in PATH for non-Windows systems
313
+
314
+ # Ensure ffmpeg is accessible
315
+ if not os.path.exists(ffmpeg_cmd) and os.name == "nt":
316
+ raise FileNotFoundError(f"ffmpeg executable not found at path: {ffmpeg_cmd}")
317
+
318
+ # Define chunk options early to avoid undefined errors
319
+ chunk_options = {
320
+ 'method': chunk_method,
321
+ 'max_size': max_chunk_size,
322
+ 'overlap': chunk_overlap,
323
+ 'adaptive': use_adaptive_chunking,
324
+ 'multi_level': use_multi_level_chunking,
325
+ 'language': chunk_language
326
+ }
327
+
328
+ # Process multiple URLs
329
+ urls = [url.strip() for url in audio_urls.split('\n') if url.strip()]
330
+
331
+ for i, url in enumerate(urls):
332
+ update_progress(f"Processing URL {i + 1}/{len(urls)}: {url}")
333
+
334
+ # Download and process audio file
335
+ audio_file_path = download_audio_file(url, use_cookies, cookies)
336
+ if not os.path.exists(audio_file_path):
337
+ update_progress(f"Downloaded file not found: {audio_file_path}")
338
+ continue
339
+
340
+ temp_files.append(audio_file_path)
341
+ update_progress("Audio file downloaded successfully.")
342
+
343
+ # Re-encode MP3 to fix potential issues
344
+ reencoded_mp3_path = reencode_mp3(audio_file_path)
345
+ if not os.path.exists(reencoded_mp3_path):
346
+ update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
347
+ continue
348
+
349
+ temp_files.append(reencoded_mp3_path)
350
+
351
+ # Convert re-encoded MP3 to WAV
352
+ wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
353
+ if not os.path.exists(wav_file_path):
354
+ update_progress(f"Converted WAV file not found: {wav_file_path}")
355
+ continue
356
+
357
+ temp_files.append(wav_file_path)
358
+
359
+ # Initialize transcription
360
+ transcription = ""
361
+
362
+ # Transcribe audio
363
+ if diarize:
364
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
365
+ else:
366
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
367
+
368
+ # Handle segments nested under 'segments' key
369
+ if isinstance(segments, dict) and 'segments' in segments:
370
+ segments = segments['segments']
371
+
372
+ if isinstance(segments, list):
373
+ transcription = " ".join([segment.get('Text', '') for segment in segments])
374
+ update_progress("Audio transcribed successfully.")
375
+ else:
376
+ update_progress("Unexpected segments format received from speech_to_text.")
377
+ logging.error(f"Unexpected segments format: {segments}")
378
+ continue
379
+
380
+ if not transcription.strip():
381
+ update_progress("Transcription is empty.")
382
+ else:
383
+ # Apply chunking
384
+ chunked_text = improved_chunking_process(transcription, chunk_options)
385
+
386
+ # Summarize
387
+ if api_name:
388
+ try:
389
+ summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
390
+ update_progress("Audio summarized successfully.")
391
+ except Exception as e:
392
+ logging.error(f"Error during summarization: {str(e)}")
393
+ summary = "Summary generation failed"
394
+ else:
395
+ summary = "No summary available (API not provided)"
396
+
397
+ all_transcriptions.append(transcription)
398
+ all_summaries.append(summary)
399
+
400
+ # Add to database
401
+ add_media_with_keywords(
402
+ url=url,
403
+ title=os.path.basename(wav_file_path),
404
+ media_type='audio',
405
+ content=transcription,
406
+ keywords=custom_keywords,
407
+ prompt=custom_prompt_input,
408
+ summary=summary,
409
+ transcription_model=whisper_model,
410
+ author="Unknown",
411
+ ingestion_date=datetime.now().strftime('%Y-%m-%d')
412
+ )
413
+ update_progress("Audio file processed and added to database.")
414
+
415
+ # Process uploaded file if provided
416
+ if audio_file:
417
+ if os.path.getsize(audio_file.name) > MAX_FILE_SIZE:
418
+ update_progress(
419
+ f"Uploaded file size exceeds the maximum limit of {MAX_FILE_SIZE / (1024 * 1024):.2f}MB. Skipping this file.")
420
+ else:
421
+ # Re-encode MP3 to fix potential issues
422
+ reencoded_mp3_path = reencode_mp3(audio_file.name)
423
+ if not os.path.exists(reencoded_mp3_path):
424
+ update_progress(f"Re-encoded file not found: {reencoded_mp3_path}")
425
+ return update_progress("Processing failed: Re-encoded file not found"), "", ""
426
+
427
+ temp_files.append(reencoded_mp3_path)
428
+
429
+ # Convert re-encoded MP3 to WAV
430
+ wav_file_path = convert_mp3_to_wav(reencoded_mp3_path)
431
+ if not os.path.exists(wav_file_path):
432
+ update_progress(f"Converted WAV file not found: {wav_file_path}")
433
+ return update_progress("Processing failed: Converted WAV file not found"), "", ""
434
+
435
+ temp_files.append(wav_file_path)
436
+
437
+ # Initialize transcription
438
+ transcription = ""
439
+
440
+ if diarize:
441
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=True)
442
+ else:
443
+ segments = speech_to_text(wav_file_path, whisper_model=whisper_model)
444
+
445
+ # Handle segments nested under 'segments' key
446
+ if isinstance(segments, dict) and 'segments' in segments:
447
+ segments = segments['segments']
448
+
449
+ if isinstance(segments, list):
450
+ transcription = " ".join([segment.get('Text', '') for segment in segments])
451
+ else:
452
+ update_progress("Unexpected segments format received from speech_to_text.")
453
+ logging.error(f"Unexpected segments format: {segments}")
454
+
455
+ chunked_text = improved_chunking_process(transcription, chunk_options)
456
+
457
+ if api_name and api_key:
458
+ try:
459
+ summary = perform_summarization(api_name, chunked_text, custom_prompt_input, api_key)
460
+ update_progress("Audio summarized successfully.")
461
+ except Exception as e:
462
+ logging.error(f"Error during summarization: {str(e)}")
463
+ summary = "Summary generation failed"
464
+ else:
465
+ summary = "No summary available (API not provided)"
466
+
467
+ all_transcriptions.append(transcription)
468
+ all_summaries.append(summary)
469
+
470
+ add_media_with_keywords(
471
+ url="Uploaded File",
472
+ title=os.path.basename(wav_file_path),
473
+ media_type='audio',
474
+ content=transcription,
475
+ keywords=custom_keywords,
476
+ prompt=custom_prompt_input,
477
+ summary=summary,
478
+ transcription_model=whisper_model,
479
+ author="Unknown",
480
+ ingestion_date=datetime.now().strftime('%Y-%m-%d')
481
+ )
482
+ update_progress("Uploaded file processed and added to database.")
483
+
484
+ # Final cleanup
485
+ if not keep_original:
486
+ cleanup_files()
487
+
488
+ final_progress = update_progress("All processing complete.")
489
+ final_transcriptions = "\n\n".join(all_transcriptions)
490
+ final_summaries = "\n\n".join(all_summaries)
491
+
492
+ return final_progress, final_transcriptions, final_summaries
493
+
494
+ except Exception as e:
495
+ logging.error(f"Error processing audio files: {str(e)}")
496
+ cleanup_files()
497
+ return update_progress(f"Processing failed: {str(e)}"), "", ""
498
+
499
+
500
+ def download_youtube_audio(url):
501
+ try:
502
+ # Determine ffmpeg path based on the operating system.
503
+ ffmpeg_path = './Bin/ffmpeg.exe' if os.name == 'nt' else 'ffmpeg'
504
+
505
+ # Create a temporary directory
506
+ with tempfile.TemporaryDirectory() as temp_dir:
507
+ # Extract information about the video
508
+ with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
509
+ info_dict = ydl.extract_info(url, download=False)
510
+ sanitized_title = sanitize_filename(info_dict['title'])
511
+
512
+ # Setup the temporary filenames
513
+ temp_video_path = Path(temp_dir) / f"{sanitized_title}_temp.mp4"
514
+ temp_audio_path = Path(temp_dir) / f"{sanitized_title}.mp3"
515
+
516
+ # Initialize yt-dlp with options for downloading
517
+ ydl_opts = {
518
+ 'format': 'bestaudio[ext=m4a]/best[height<=480]', # Prefer best audio, or video up to 480p
519
+ 'ffmpeg_location': ffmpeg_path,
520
+ 'outtmpl': str(temp_video_path),
521
+ 'noplaylist': True,
522
+ 'quiet': True
523
+ }
524
+
525
+ # Execute yt-dlp to download the video/audio
526
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
527
+ ydl.download([url])
528
+
529
+ # Check if the file exists
530
+ if not temp_video_path.exists():
531
+ raise FileNotFoundError(f"Expected file was not found: {temp_video_path}")
532
+
533
+ # Use ffmpeg to extract audio
534
+ ffmpeg_command = [
535
+ ffmpeg_path,
536
+ '-i', str(temp_video_path),
537
+ '-vn', # No video
538
+ '-acodec', 'libmp3lame',
539
+ '-b:a', '192k',
540
+ str(temp_audio_path)
541
+ ]
542
+ subprocess.run(ffmpeg_command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
543
+
544
+ # Check if the audio file was created
545
+ if not temp_audio_path.exists():
546
+ raise FileNotFoundError(f"Expected audio file was not found: {temp_audio_path}")
547
+
548
+ # Create a persistent directory for the download if it doesn't exist
549
+ persistent_dir = Path("downloads")
550
+ persistent_dir.mkdir(exist_ok=True)
551
+
552
+ # Move the file from the temporary directory to the persistent directory
553
+ persistent_file_path = persistent_dir / f"{sanitized_title}.mp3"
554
+ os.replace(str(temp_audio_path), str(persistent_file_path))
555
+
556
+ # Add the file to the list of downloaded files
557
+ downloaded_files.append(str(persistent_file_path))
558
+
559
+ return str(persistent_file_path), f"Audio downloaded successfully: {sanitized_title}.mp3"
560
+ except Exception as e:
561
+ return None, f"Error downloading audio: {str(e)}"
562
+
563
+
564
+ def process_podcast(url, title, author, keywords, custom_prompt, api_name, api_key, whisper_model,
565
+ keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
566
+ chunk_method=None, max_chunk_size=300, chunk_overlap=0, use_adaptive_chunking=False,
567
+ use_multi_level_chunking=False, chunk_language='english'):
568
+ progress = []
569
+ error_message = ""
570
+ temp_files = []
571
+
572
+ def update_progress(message):
573
+ progress.append(message)
574
+ return "\n".join(progress)
575
+
576
+ def cleanup_files():
577
+ if not keep_original:
578
+ for file in temp_files:
579
+ try:
580
+ if os.path.exists(file):
581
+ os.remove(file)
582
+ update_progress(f"Temporary file {file} removed.")
583
+ except Exception as e:
584
+ update_progress(f"Failed to remove temporary file {file}: {str(e)}")
585
+
586
+ try:
587
+ # Download podcast
588
+ audio_file = download_audio_file(url, use_cookies, cookies)
589
+ temp_files.append(audio_file)
590
+ update_progress("Podcast downloaded successfully.")
591
+
592
+ # Extract metadata
593
+ metadata = extract_metadata(url)
594
+ title = title or metadata.get('title', 'Unknown Podcast')
595
+ author = author or metadata.get('uploader', 'Unknown Author')
596
+
597
+ # Format metadata for storage
598
+ metadata_text = f"""
599
+ Metadata:
600
+ Title: {title}
601
+ Author: {author}
602
+ Series: {metadata.get('series', 'N/A')}
603
+ Episode: {metadata.get('episode', 'N/A')}
604
+ Season: {metadata.get('season', 'N/A')}
605
+ Upload Date: {metadata.get('upload_date', 'N/A')}
606
+ Duration: {metadata.get('duration', 'N/A')} seconds
607
+ Description: {metadata.get('description', 'N/A')}
608
+ """
609
+
610
+ # Update keywords
611
+ new_keywords = []
612
+ if metadata.get('series'):
613
+ new_keywords.append(f"series:{metadata['series']}")
614
+ if metadata.get('episode'):
615
+ new_keywords.append(f"episode:{metadata['episode']}")
616
+ if metadata.get('season'):
617
+ new_keywords.append(f"season:{metadata['season']}")
618
+
619
+ keywords = f"{keywords},{','.join(new_keywords)}" if keywords else ','.join(new_keywords)
620
+
621
+ update_progress(f"Metadata extracted - Title: {title}, Author: {author}, Keywords: {keywords}")
622
+
623
+ # Transcribe the podcast
624
+ try:
625
+ if enable_diarization:
626
+ segments = speech_to_text(audio_file, whisper_model=whisper_model, diarize=True)
627
+ else:
628
+ segments = speech_to_text(audio_file, whisper_model=whisper_model)
629
+ transcription = " ".join([segment['Text'] for segment in segments])
630
+ update_progress("Podcast transcribed successfully.")
631
+ except Exception as e:
632
+ error_message = f"Transcription failed: {str(e)}"
633
+ raise
634
+
635
+ # Apply chunking
636
+ chunk_options = {
637
+ 'method': chunk_method,
638
+ 'max_size': max_chunk_size,
639
+ 'overlap': chunk_overlap,
640
+ 'adaptive': use_adaptive_chunking,
641
+ 'multi_level': use_multi_level_chunking,
642
+ 'language': chunk_language
643
+ }
644
+ chunked_text = improved_chunking_process(transcription, chunk_options)
645
+
646
+ # Combine metadata and transcription
647
+ full_content = metadata_text + "\n\nTranscription:\n" + transcription
648
+
649
+ # Summarize if API is provided
650
+ summary = None
651
+ if api_name and api_key:
652
+ try:
653
+ summary = perform_summarization(api_name, chunked_text, custom_prompt, api_key)
654
+ update_progress("Podcast summarized successfully.")
655
+ except Exception as e:
656
+ error_message = f"Summarization failed: {str(e)}"
657
+ raise
658
+
659
+ # Add to database
660
+ try:
661
+ add_media_with_keywords(
662
+ url=url,
663
+ title=title,
664
+ media_type='podcast',
665
+ content=full_content,
666
+ keywords=keywords,
667
+ prompt=custom_prompt,
668
+ summary=summary or "No summary available",
669
+ transcription_model=whisper_model,
670
+ author=author,
671
+ ingestion_date=datetime.now().strftime('%Y-%m-%d')
672
+ )
673
+ update_progress("Podcast added to database successfully.")
674
+ except Exception as e:
675
+ error_message = f"Error adding podcast to database: {str(e)}"
676
+ raise
677
+
678
+ # Cleanup
679
+ cleanup_files()
680
+
681
+ return (update_progress("Processing complete."), full_content, summary or "No summary generated.",
682
+ title, author, keywords, error_message)
683
+
684
+ except Exception as e:
685
+ logging.error(f"Error processing podcast: {str(e)}")
686
+ cleanup_files()
687
+ return update_progress(f"Processing failed: {str(e)}"), "", "", "", "", "", str(e)
688
+
689
+
690
+ #
691
+ #
692
  #######################################################################################################################