oceansweep commited on
Commit
7e8c144
1 Parent(s): 49ba24f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +931 -930
app.py CHANGED
@@ -1,930 +1,931 @@
1
- #!/usr/bin/env python3
2
- # Std Lib Imports
3
- import argparse
4
- import atexit
5
- import json
6
- import logging
7
- import os
8
- import signal
9
- import sys
10
- import time
11
- import webbrowser
12
- #
13
- # Local Library Imports
14
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'App_Function_Libraries')))
15
- from App_Function_Libraries.Book_Ingestion_Lib import ingest_folder, ingest_text_file
16
- from App_Function_Libraries.Chunk_Lib import semantic_chunk_long_file#, rolling_summarize_function,
17
- from App_Function_Libraries.Gradio_Related import launch_ui
18
- from App_Function_Libraries.Local_LLM_Inference_Engine_Lib import cleanup_process, local_llm_function
19
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
20
- summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
21
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
22
- summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
23
- summarize_with_huggingface, perform_transcription, perform_summarization
24
- from App_Function_Libraries.Audio_Transcription_Lib import convert_to_wav, speech_to_text
25
- from App_Function_Libraries.Local_File_Processing_Lib import read_paths_from_file, process_local_file
26
- from App_Function_Libraries.SQLite_DB import add_media_to_database, is_valid_url
27
- from App_Function_Libraries.System_Checks_Lib import cuda_check, platform_check, check_ffmpeg
28
- from App_Function_Libraries.Utils import load_and_log_configs, sanitize_filename, create_download_directory, extract_text_from_segments
29
- from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
30
- #
31
- # 3rd-Party Module Imports
32
- import requests
33
- # OpenAI Tokenizer support
34
- #
35
- # Other Tokenizers
36
- #
37
- #######################
38
- # Logging Setup
39
- #
40
- log_level = "DEBUG"
41
- logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
42
- os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
43
- #
44
- #############
45
- # Global variables setup
46
- custom_prompt_input = ("Above is the transcript of a video. Please read through the transcript carefully. Identify the "
47
- "main topics that are discussed over the course of the transcript. Then, summarize the key points about each main "
48
- "topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, "
49
- "but should be much shorter than the full transcript. Please output your bullet point summary inside <bulletpoints> "
50
- "tags.")
51
- #
52
- # Global variables
53
- whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
54
- "distil-large-v2", "distil-medium.en", "distil-small.en"]
55
- server_mode = False
56
- share_public = False
57
- #
58
- #
59
- #######################
60
-
61
- #######################
62
- # Function Sections
63
- #
64
- abc_xyz = """
65
- Database Setup
66
- Config Loading
67
- System Checks
68
- DataBase Functions
69
- Processing Paths and local file handling
70
- Video Download/Handling
71
- Audio Transcription
72
- Diarization
73
- Chunking-related Techniques & Functions
74
- Tokenization-related Techniques & Functions
75
- Summarizers
76
- Gradio UI
77
- Main
78
- """
79
- #
80
- #
81
- #######################
82
- #######################
83
- #
84
- # TL/DW: Too Long Didn't Watch
85
- #
86
- # Project originally created by https://github.com/the-crypt-keeper
87
- # Modifications made by https://github.com/rmusser01
88
- # All credit to the original authors, I've just glued shit together.
89
- #
90
- #
91
- # Usage:
92
- #
93
- # Download Audio only from URL -> Transcribe audio:
94
- # python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
95
- #
96
- # Download Audio+Video from URL -> Transcribe audio from Video:**
97
- # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
98
- #
99
- # Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
100
- # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
101
- #
102
- # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
103
- # python summarize.py ./local/file_on_your/system --api_name <API_name>`
104
- #
105
- # Run it as a WebApp**
106
- # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
107
- # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
108
- #
109
- #######################
110
-
111
-
112
- #######################
113
- # Random issues I've encountered and how I solved them:
114
- # 1. Something about cuda nn library missing, even though cuda is installed...
115
- # https://github.com/tensorflow/tensorflow/issues/54784 - Basically, installing zlib made it go away. idk.
116
- # Or https://github.com/SYSTRAN/faster-whisper/issues/85
117
- #
118
- # 2. ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'C:\\Python312\\Scripts\\dateparser-download.exe' -> 'C:\\Python312\\Scripts\\dateparser-download.exe.deleteme'
119
- # Resolved through adding --user to the pip install command
120
- #
121
- # 3. Windows: Could not locate cudnn_ops_infer64_8.dll. Please make sure it is in your library path!
122
- #
123
- # 4.
124
- #
125
- # 5.
126
- #
127
- #
128
- #
129
- #######################
130
-
131
-
132
- #######################
133
- # DB Setup
134
-
135
- # Handled by SQLite_DB.py
136
-
137
- #######################
138
-
139
-
140
- #######################
141
- # Config loading
142
- #
143
- # 1.
144
- # 2.
145
- #
146
- #
147
- #######################
148
-
149
-
150
- #######################
151
- # System Startup Notice
152
- #
153
-
154
- # Dirty hack - sue me. - FIXME - fix this...
155
- os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
156
-
157
- whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
158
- "distil-large-v2", "distil-medium.en", "distil-small.en"]
159
- source_languages = {
160
- "en": "English",
161
- "zh": "Chinese",
162
- "de": "German",
163
- "es": "Spanish",
164
- "ru": "Russian",
165
- "ko": "Korean",
166
- "fr": "French"
167
- }
168
- source_language_list = [key[0] for key in source_languages.items()]
169
-
170
-
171
- def print_hello():
172
- print(r"""_____ _ ________ _ _
173
- |_ _|| | / /| _ \| | | | _
174
- | | | | / / | | | || | | |(_)
175
- | | | | / / | | | || |/\| |
176
- | | | |____ / / | |/ / \ /\ / _
177
- \_/ \_____//_/ |___/ \/ \/ (_)
178
-
179
-
180
- _ _
181
- | | | |
182
- | |_ ___ ___ | | ___ _ __ __ _
183
- | __| / _ \ / _ \ | | / _ \ | '_ \ / _` |
184
- | |_ | (_) || (_) | | || (_) || | | || (_| | _
185
- \__| \___/ \___/ |_| \___/ |_| |_| \__, |( )
186
- __/ ||/
187
- |___/
188
- _ _ _ _ _ _ _
189
- | |(_) | | ( )| | | | | |
190
- __| | _ __| | _ __ |/ | |_ __ __ __ _ | |_ ___ | |__
191
- / _` || | / _` || '_ \ | __| \ \ /\ / / / _` || __| / __|| '_ \
192
- | (_| || || (_| || | | | | |_ \ V V / | (_| || |_ | (__ | | | |
193
- \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
194
- """)
195
- time.sleep(1)
196
- return
197
-
198
-
199
- #
200
- #
201
- #######################
202
-
203
-
204
- #######################
205
- # System Check Functions
206
- #
207
- # 1. platform_check()
208
- # 2. cuda_check()
209
- # 3. decide_cpugpu()
210
- # 4. check_ffmpeg()
211
- # 5. download_ffmpeg()
212
- #
213
- #######################
214
-
215
-
216
- #######################
217
- # DB Functions
218
- #
219
- # create_tables()
220
- # add_keyword()
221
- # delete_keyword()
222
- # add_keyword()
223
- # add_media_with_keywords()
224
- # search_db()
225
- # format_results()
226
- # search_and_display()
227
- # export_to_csv()
228
- # is_valid_url()
229
- # is_valid_date()
230
- #
231
- ########################################################################################################################
232
-
233
-
234
- ########################################################################################################################
235
- # Processing Paths and local file handling
236
- #
237
- # Function List
238
- # 1. read_paths_from_file(file_path)
239
- # 2. process_path(path)
240
- # 3. process_local_file(file_path)
241
- # 4. read_paths_from_file(file_path: str) -> List[str]
242
- #
243
- #
244
- ########################################################################################################################
245
-
246
-
247
- #######################################################################################################################
248
- # Online Article Extraction / Handling
249
- #
250
- # Function List
251
- # 1. get_page_title(url)
252
- # 2. get_article_text(url)
253
- # 3. get_article_title(article_url_arg)
254
- #
255
- #
256
- #######################################################################################################################
257
-
258
-
259
- #######################################################################################################################
260
- # Video Download/Handling
261
- # Video-DL-Ingestion-Lib
262
- #
263
- # Function List
264
- # 1. get_video_info(url)
265
- # 2. create_download_directory(title)
266
- # 3. sanitize_filename(title)
267
- # 4. normalize_title(title)
268
- # 5. get_youtube(video_url)
269
- # 6. get_playlist_videos(playlist_url)
270
- # 7. download_video(video_url, download_path, info_dict, download_video_flag)
271
- # 8. save_to_file(video_urls, filename)
272
- # 9. save_summary_to_file(summary, file_path)
273
- # 10. process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, download_video, download_audio, rolling_summarization, detail_level, question_box, keywords, ) # FIXME - UPDATE
274
- #
275
- #
276
- #######################################################################################################################
277
-
278
-
279
- #######################################################################################################################
280
- # Audio Transcription
281
- #
282
- # Function List
283
- # 1. convert_to_wav(video_file_path, offset=0, overwrite=False)
284
- # 2. speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False)
285
- #
286
- #
287
- #######################################################################################################################
288
-
289
-
290
- #######################################################################################################################
291
- # Diarization
292
- #
293
- # Function List 1. speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding",
294
- # embedding_size=512, num_speakers=0)
295
- #
296
- #
297
- #######################################################################################################################
298
-
299
-
300
- #######################################################################################################################
301
- # Chunking-related Techniques & Functions
302
- #
303
- #
304
- # FIXME
305
- #
306
- #
307
- #######################################################################################################################
308
-
309
-
310
- #######################################################################################################################
311
- # Tokenization-related Functions
312
- #
313
- #
314
-
315
- # FIXME
316
-
317
- #
318
- #
319
- #######################################################################################################################
320
-
321
-
322
- #######################################################################################################################
323
- # Website-related Techniques & Functions
324
- #
325
- #
326
-
327
- #
328
- #
329
- #######################################################################################################################
330
-
331
-
332
- #######################################################################################################################
333
- # Summarizers
334
- #
335
- # Function List
336
- # 1. extract_text_from_segments(segments: List[Dict]) -> str
337
- # 2. summarize_with_openai(api_key, file_path, custom_prompt_arg)
338
- # 3. summarize_with_anthropic(api_key, file_path, model, custom_prompt_arg, max_retries=3, retry_delay=5)
339
- # 4. summarize_with_cohere(api_key, file_path, model, custom_prompt_arg)
340
- # 5. summarize_with_groq(api_key, file_path, model, custom_prompt_arg)
341
- #
342
- #################################
343
- # Local Summarization
344
- #
345
- # Function List
346
- #
347
- # 1. summarize_with_local_llm(file_path, custom_prompt_arg)
348
- # 2. summarize_with_llama(api_url, file_path, token, custom_prompt)
349
- # 3. summarize_with_kobold(api_url, file_path, kobold_api_token, custom_prompt)
350
- # 4. summarize_with_oobabooga(api_url, file_path, ooba_api_token, custom_prompt)
351
- # 5. summarize_with_vllm(vllm_api_url, vllm_api_key_function_arg, llm_model, text, vllm_custom_prompt_function_arg)
352
- # 6. summarize_with_tabbyapi(tabby_api_key, tabby_api_IP, text, tabby_model, custom_prompt)
353
- # 7. save_summary_to_file(summary, file_path)
354
- #
355
- #######################################################################################################################
356
-
357
-
358
- #######################################################################################################################
359
- # Summarization with Detail
360
- #
361
-
362
- # FIXME - see 'Old_Chunking_Lib.py'
363
-
364
- #
365
- #
366
- #######################################################################################################################
367
-
368
-
369
- #######################################################################################################################
370
- # Gradio UI
371
- #
372
- #
373
- #
374
- #
375
- #
376
- #################################################################################################################
377
- #
378
- #######################################################################################################################
379
- # Local LLM Setup / Running
380
- #
381
- # Function List
382
- # 1. download_latest_llamafile(repo, asset_name_prefix, output_filename)
383
- # 2. download_file(url, dest_path, expected_checksum=None, max_retries=3, delay=5)
384
- # 3. verify_checksum(file_path, expected_checksum)
385
- # 4. cleanup_process()
386
- # 5. signal_handler(sig, frame)
387
- # 6. local_llm_function()
388
- # 7. launch_in_new_terminal_windows(executable, args)
389
- # 8. launch_in_new_terminal_linux(executable, args)
390
- # 9. launch_in_new_terminal_mac(executable, args)
391
- #
392
- #
393
- #######################################################################################################################
394
-
395
-
396
- #######################################################################################################################
397
- # Helper Functions for Main() & process_url()
398
- #
399
- #
400
- #
401
- #######################################################################################################################
402
-
403
-
404
- ######################################################################################################################
405
- # Main()
406
- #
407
-
408
- def main(input_path, api_name=None, api_key=None,
409
- num_speakers=2,
410
- whisper_model="small.en",
411
- offset=0,
412
- vad_filter=False,
413
- download_video_flag=False,
414
- custom_prompt=None,
415
- overwrite=False,
416
- rolling_summarization=False,
417
- detail=0.01,
418
- keywords=None,
419
- llm_model=None,
420
- time_based=False,
421
- set_chunk_txt_by_words=False,
422
- set_max_txt_chunk_words=0,
423
- set_chunk_txt_by_sentences=False,
424
- set_max_txt_chunk_sentences=0,
425
- set_chunk_txt_by_paragraphs=False,
426
- set_max_txt_chunk_paragraphs=0,
427
- set_chunk_txt_by_tokens=False,
428
- set_max_txt_chunk_tokens=0,
429
- ingest_text_file=False,
430
- chunk=False,
431
- max_chunk_size=2000,
432
- chunk_overlap=100,
433
- chunk_unit='tokens',
434
- summarize_chunks=None,
435
- diarize=False
436
- ):
437
- global detail_level_number, summary, audio_file, transcription_text, info_dict
438
-
439
- detail_level = detail
440
-
441
- print(f"Keywords: {keywords}")
442
-
443
- if not input_path:
444
- return []
445
-
446
- start_time = time.monotonic()
447
- paths = [input_path] if not os.path.isfile(input_path) else read_paths_from_file(input_path)
448
- results = []
449
-
450
- for path in paths:
451
- try:
452
- if path.startswith('http'):
453
- info_dict, title = extract_video_info(path)
454
- download_path = create_download_directory(title)
455
- video_path = download_video(path, download_path, info_dict, download_video_flag)
456
-
457
- if video_path:
458
- if diarize:
459
- audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
460
- transcription_text = {'audio_file': audio_file, 'transcription': segments}
461
- else:
462
- audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
463
- transcription_text = {'audio_file': audio_file, 'transcription': segments}
464
-
465
- # FIXME rolling summarization
466
- if rolling_summarization == True:
467
- pass
468
- # text = extract_text_from_segments(segments)
469
- # detail = detail_level
470
- # additional_instructions = custom_prompt_input
471
- # chunk_text_by_words = set_chunk_txt_by_words
472
- # max_words = set_max_txt_chunk_words
473
- # chunk_text_by_sentences = set_chunk_txt_by_sentences
474
- # max_sentences = set_max_txt_chunk_sentences
475
- # chunk_text_by_paragraphs = set_chunk_txt_by_paragraphs
476
- # max_paragraphs = set_max_txt_chunk_paragraphs
477
- # chunk_text_by_tokens = set_chunk_txt_by_tokens
478
- # max_tokens = set_max_txt_chunk_tokens
479
- # # FIXME
480
- # summarize_recursively = rolling_summarization
481
- # verbose = False
482
- # model = None
483
- # summary = rolling_summarize_function(text, detail, api_name, api_key, model, custom_prompt_input,
484
- # chunk_text_by_words,
485
- # max_words, chunk_text_by_sentences,
486
- # max_sentences, chunk_text_by_paragraphs,
487
- # max_paragraphs, chunk_text_by_tokens,
488
- # max_tokens, summarize_recursively, verbose
489
- # )
490
-
491
-
492
- elif api_name:
493
- summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
494
- else:
495
- summary = None
496
-
497
- if summary:
498
- # Save the summary file in the download_path directory
499
- summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
500
- with open(summary_file_path, 'w') as file:
501
- file.write(summary)
502
-
503
- add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
504
- else:
505
- logging.error(f"Failed to download video: {path}")
506
-
507
- # FIXME - make sure this doesn't break ingesting multiple videos vs multiple text files
508
- # FIXME - Need to update so that chunking is fully handled.
509
- elif chunk and path.lower().endswith('.txt'):
510
- chunks = semantic_chunk_long_file(path, max_chunk_size, chunk_overlap)
511
- if chunks:
512
- chunks_data = {
513
- "file_path": path,
514
- "chunk_unit": chunk_unit,
515
- "max_chunk_size": max_chunk_size,
516
- "chunk_overlap": chunk_overlap,
517
- "chunks": []
518
- }
519
- summaries_data = {
520
- "file_path": path,
521
- "summarization_method": summarize_chunks,
522
- "summaries": []
523
- }
524
-
525
- for i, chunk_text in enumerate(chunks):
526
- chunk_info = {
527
- "chunk_id": i + 1,
528
- "text": chunk_text
529
- }
530
- chunks_data["chunks"].append(chunk_info)
531
-
532
- if summarize_chunks:
533
- summary = None
534
- if summarize_chunks == 'openai':
535
- summary = summarize_with_openai(api_key, chunk_text, custom_prompt)
536
- elif summarize_chunks == 'anthropic':
537
- summary = summarize_with_anthropic(api_key, chunk_text, custom_prompt)
538
- elif summarize_chunks == 'cohere':
539
- summary = summarize_with_cohere(api_key, chunk_text, custom_prompt)
540
- elif summarize_chunks == 'groq':
541
- summary = summarize_with_groq(api_key, chunk_text, custom_prompt)
542
- elif summarize_chunks == 'local-llm':
543
- summary = summarize_with_local_llm(chunk_text, custom_prompt)
544
- # FIXME - Add more summarization methods as needed
545
-
546
- if summary:
547
- summary_info = {
548
- "chunk_id": i + 1,
549
- "summary": summary
550
- }
551
- summaries_data["summaries"].append(summary_info)
552
- else:
553
- logging.warning(f"Failed to generate summary for chunk {i + 1}")
554
-
555
- # Save chunks to a single JSON file
556
- chunks_file_path = f"{path}_chunks.json"
557
- with open(chunks_file_path, 'w', encoding='utf-8') as f:
558
- json.dump(chunks_data, f, ensure_ascii=False, indent=2)
559
- logging.info(f"All chunks saved to {chunks_file_path}")
560
-
561
- # Save summaries to a single JSON file (if summarization was performed)
562
- if summarize_chunks:
563
- summaries_file_path = f"{path}_summaries.json"
564
- with open(summaries_file_path, 'w', encoding='utf-8') as f:
565
- json.dump(summaries_data, f, ensure_ascii=False, indent=2)
566
- logging.info(f"All summaries saved to {summaries_file_path}")
567
-
568
- logging.info(f"File {path} chunked into {len(chunks)} parts using {chunk_unit} as the unit.")
569
- else:
570
- logging.error(f"Failed to chunk file {path}")
571
-
572
- # Handle downloading of URLs from a text file or processing local video/audio files
573
- else:
574
- download_path, info_dict, urls_or_media_file = process_local_file(path)
575
- if isinstance(urls_or_media_file, list):
576
- # Text file containing URLs
577
- for url in urls_or_media_file:
578
- for item in urls_or_media_file:
579
- if item.startswith(('http://', 'https://')):
580
- info_dict, title = extract_video_info(url)
581
- download_path = create_download_directory(title)
582
- video_path = download_video(url, download_path, info_dict, download_video_flag)
583
-
584
- if video_path:
585
- if diarize:
586
- audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
587
- else:
588
- audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
589
-
590
- transcription_text = {'audio_file': audio_file, 'transcription': segments}
591
- if rolling_summarization:
592
- text = extract_text_from_segments(segments)
593
- # FIXME
594
- #summary = summarize_with_detail_openai(text, detail=detail)
595
- elif api_name:
596
- summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
597
- else:
598
- summary = None
599
-
600
- if summary:
601
- # Save the summary file in the download_path directory
602
- summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
603
- with open(summary_file_path, 'w') as file:
604
- file.write(summary)
605
-
606
- add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
607
- else:
608
- logging.error(f"Failed to download video: {url}")
609
-
610
- else:
611
- # Video or audio or txt file
612
- media_path = urls_or_media_file
613
-
614
- if media_path.lower().endswith(('.txt', '.md')):
615
- if media_path.lower().endswith('.txt'):
616
- # Handle text file ingestion
617
- result = ingest_text_file(media_path)
618
- logging.info(result)
619
- elif media_path.lower().endswith(('.mp4', '.avi', '.mov')):
620
- if diarize:
621
- audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter, diarize=True)
622
- else:
623
- audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter)
624
- elif media_path.lower().endswith(('.wav', '.mp3', '.m4a')):
625
- if diarize:
626
- segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter, diarize=True)
627
- else:
628
- segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter)
629
- else:
630
- logging.error(f"Unsupported media file format: {media_path}")
631
- continue
632
-
633
- transcription_text = {'media_path': path, 'audio_file': media_path, 'transcription': segments}
634
-
635
- # FIXME
636
- if rolling_summarization:
637
- # text = extract_text_from_segments(segments)
638
- # summary = summarize_with_detail_openai(text, detail=detail)
639
- pass
640
- elif api_name:
641
- summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
642
- else:
643
- summary = None
644
-
645
- if summary:
646
- # Save the summary file in the download_path directory
647
- summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
648
- with open(summary_file_path, 'w') as file:
649
- file.write(summary)
650
-
651
- add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
652
-
653
- except Exception as e:
654
- logging.error(f"Error processing {path}: {str(e)}")
655
- continue
656
-
657
- return transcription_text
658
-
659
-
660
- def signal_handler(sig, frame):
661
- logging.info('Signal handler called with signal: %s', sig)
662
- cleanup_process()
663
- sys.exit(0)
664
-
665
-
666
- ############################## MAIN ##############################
667
- #
668
- #
669
-
670
- if __name__ == "__main__":
671
- # Register signal handlers
672
- signal.signal(signal.SIGINT, signal_handler)
673
- signal.signal(signal.SIGTERM, signal_handler)
674
-
675
- # Logging setup
676
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
677
-
678
- # Load Config
679
- loaded_config_data = load_and_log_configs()
680
-
681
- if loaded_config_data:
682
- logging.info("Main: Configuration loaded successfully")
683
- # You can access the configuration data like this:
684
- # print(f"OpenAI API Key: {config_data['api_keys']['openai']}")
685
- # print(f"Anthropic Model: {config_data['models']['anthropic']}")
686
- # print(f"Kobold API IP: {config_data['local_apis']['kobold']['ip']}")
687
- # print(f"Output Path: {config_data['output_path']}")
688
- # print(f"Processing Choice: {config_data['processing_choice']}")
689
- else:
690
- print("Failed to load configuration")
691
-
692
- # Print ascii_art
693
- print_hello()
694
-
695
- transcription_text = None
696
-
697
- parser = argparse.ArgumentParser(
698
- description='Transcribe and summarize videos.',
699
- epilog='''
700
- Sample commands:
701
- 1. Simple Sample command structure:
702
- summarize.py <path_to_video> -api openai -k tag_one tag_two tag_three
703
-
704
- 2. Rolling Summary Sample command structure:
705
- summarize.py <path_to_video> -api openai -prompt "custom_prompt_goes_here-is-appended-after-transcription" -roll -detail 0.01 -k tag_one tag_two tag_three
706
-
707
- 3. FULL Sample command structure:
708
- summarize.py <path_to_video> -api openai -ns 2 -wm small.en -off 0 -vad -log INFO -prompt "custom_prompt" -overwrite -roll -detail 0.01 -k tag_one tag_two tag_three
709
-
710
- 4. Sample command structure for UI:
711
- summarize.py -gui -log DEBUG
712
- ''',
713
- formatter_class=argparse.RawTextHelpFormatter
714
- )
715
- parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
716
- parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
717
- parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
718
- parser.add_argument('-key', '--api_key', type=str, help='API key for summarization (optional)')
719
- parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
720
- parser.add_argument('-wm', '--whisper_model', type=str, default='small',
721
- help='Whisper model (default: small)| Options: tiny.en, tiny, base.en, base, small.en, small, medium.en, '
722
- 'medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, '
723
- 'distil-small.en')
724
- parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
725
- parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
726
- parser.add_argument('-log', '--log_level', type=str, default='INFO',
727
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
728
- parser.add_argument('-gui', '--user_interface', action='store_true', default=True, help="Launch the Gradio user interface")
729
- parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
730
- parser.add_argument('-prompt', '--custom_prompt', type=str,
731
- help='Pass in a custom prompt to be used in place of the existing one.\n (Probably should just '
732
- 'modify the script itself...)')
733
- parser.add_argument('-overwrite', '--overwrite', action='store_true', help='Overwrite existing files')
734
- parser.add_argument('-roll', '--rolling_summarization', action='store_true', help='Enable rolling summarization')
735
- parser.add_argument('-detail', '--detail_level', type=float, help='Mandatory if rolling summarization is enabled, '
736
- 'defines the chunk size.\n Default is 0.01(lots '
737
- 'of chunks) -> 1.00 (few chunks)\n Currently '
738
- 'only OpenAI works. ',
739
- default=0.01, )
740
- parser.add_argument('-model', '--llm_model', type=str, default='',
741
- help='Model to use for LLM summarization (only used for vLLM/TabbyAPI)')
742
- parser.add_argument('-k', '--keywords', nargs='+', default=['cli_ingest_no_tag'],
743
- help='Keywords for tagging the media, can use multiple separated by spaces (default: cli_ingest_no_tag)')
744
- parser.add_argument('--log_file', type=str, help='Where to save logfile (non-default)')
745
- parser.add_argument('--local_llm', action='store_true',
746
- help="Use a local LLM from the script(Downloads llamafile from github and 'mistral-7b-instruct-v0.2.Q8' - 8GB model from Huggingface)")
747
- parser.add_argument('--server_mode', action='store_true',
748
- help='Run in server mode (This exposes the GUI/Server to the network)')
749
- parser.add_argument('--share_public', type=int, default=7860,
750
- help="This will use Gradio's built-in ngrok tunneling to share the server publicly on the internet. Specify the port to use (default: 7860)")
751
- parser.add_argument('--port', type=int, default=7860, help='Port to run the server on')
752
- parser.add_argument('--ingest_text_file', action='store_true',
753
- help='Ingest .txt files as content instead of treating them as URL lists')
754
- parser.add_argument('--text_title', type=str, help='Title for the text file being ingested')
755
- parser.add_argument('--text_author', type=str, help='Author of the text file being ingested')
756
- parser.add_argument('--diarize', action='store_true', help='Enable speaker diarization')
757
- # parser.add_argument('--offload', type=int, default=20, help='Numbers of layers to offload to GPU for Llamafile usage')
758
- # parser.add_argument('-o', '--output_path', type=str, help='Path to save the output file')
759
-
760
- args = parser.parse_args()
761
-
762
- # Set Chunking values/variables
763
- set_chunk_txt_by_words = False
764
- set_max_txt_chunk_words = 0
765
- set_chunk_txt_by_sentences = False
766
- set_max_txt_chunk_sentences = 0
767
- set_chunk_txt_by_paragraphs = False
768
- set_max_txt_chunk_paragraphs = 0
769
- set_chunk_txt_by_tokens = False
770
- set_max_txt_chunk_tokens = 0
771
-
772
- if args.share_public:
773
- share_public = args.share_public
774
- else:
775
- share_public = None
776
- if args.server_mode:
777
-
778
- server_mode = args.server_mode
779
- else:
780
- server_mode = None
781
- if args.server_mode is True:
782
- server_mode = True
783
- if args.port:
784
- server_port = args.port
785
- else:
786
- server_port = None
787
-
788
- ########## Logging setup
789
- logger = logging.getLogger()
790
- logger.setLevel(getattr(logging, args.log_level))
791
-
792
- # Create console handler
793
- console_handler = logging.StreamHandler()
794
- console_handler.setLevel(getattr(logging, args.log_level))
795
- console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
796
- console_handler.setFormatter(console_formatter)
797
-
798
- if args.log_file:
799
- # Create file handler
800
- file_handler = logging.FileHandler(args.log_file)
801
- file_handler.setLevel(getattr(logging, args.log_level))
802
- file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
803
- file_handler.setFormatter(file_formatter)
804
- logger.addHandler(file_handler)
805
- logger.info(f"Log file created at: {args.log_file}")
806
-
807
- ########## Custom Prompt setup
808
- custom_prompt_input = args.custom_prompt
809
-
810
- if not args.custom_prompt:
811
- logging.debug("No custom prompt defined, will use default")
812
- args.custom_prompt_input = (
813
- "\n\nabove is the transcript of a video. "
814
- "Please read through the transcript carefully. Identify the main topics that are "
815
- "discussed over the course of the transcript. Then, summarize the key points about each "
816
- "main topic in a concise bullet point. The bullet points should cover the key "
817
- "information conveyed about each topic in the video, but should be much shorter than "
818
- "the full transcript. Please output your bullet point summary inside <bulletpoints> "
819
- "tags."
820
- )
821
- print("No custom prompt defined, will use default")
822
-
823
- custom_prompt_input = args.custom_prompt
824
- else:
825
- logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt_input} \n\nas the prompt")
826
- print(f"Custom Prompt has been defined. Custom prompt: \n\n {args.custom_prompt}")
827
-
828
- # Check if the user wants to use the local LLM from the script
829
- local_llm = args.local_llm
830
- logging.info(f'Local LLM flag: {local_llm}')
831
-
832
- # Check if the user wants to ingest a text file (singular or multiple from a folder)
833
- if args.input_path is not None:
834
- if os.path.isdir(args.input_path) and args.ingest_text_file:
835
- results = ingest_folder(args.input_path, keywords=args.keywords)
836
- for result in results:
837
- print(result)
838
- elif args.input_path.lower().endswith('.txt') and args.ingest_text_file:
839
- result = ingest_text_file(args.input_path, title=args.text_title, author=args.text_author,
840
- keywords=args.keywords)
841
- print(result)
842
- sys.exit(0)
843
-
844
- # Launch the GUI
845
- # This is huggingface so:
846
- if args.user_interface:
847
- if local_llm:
848
- local_llm_function()
849
- time.sleep(2)
850
- webbrowser.open_new_tab('http://127.0.0.1:7860')
851
- launch_ui()
852
- elif not args.input_path:
853
- parser.print_help()
854
- sys.exit(1)
855
-
856
- else:
857
- logging.info('Starting the transcription and summarization process.')
858
- logging.info(f'Input path: {args.input_path}')
859
- logging.info(f'API Name: {args.api_name}')
860
- logging.info(f'Number of speakers: {args.num_speakers}')
861
- logging.info(f'Whisper model: {args.whisper_model}')
862
- logging.info(f'Offset: {args.offset}')
863
- logging.info(f'VAD filter: {args.vad_filter}')
864
- logging.info(f'Log Level: {args.log_level}')
865
- logging.info(f'Demo Mode: {args.demo_mode}')
866
- logging.info(f'Custom Prompt: {args.custom_prompt}')
867
- logging.info(f'Overwrite: {args.overwrite}')
868
- logging.info(f'Rolling Summarization: {args.rolling_summarization}')
869
- logging.info(f'User Interface: {args.user_interface}')
870
- logging.info(f'Video Download: {args.video}')
871
- # logging.info(f'Save File location: {args.output_path}')
872
- # logging.info(f'Log File location: {args.log_file}')
873
-
874
- global api_name
875
- api_name = args.api_name
876
-
877
- summary = None # Initialize to ensure it's always defined
878
- if args.detail_level == None:
879
- args.detail_level = 0.01
880
-
881
- # FIXME
882
- # if args.api_name and args.rolling_summarization and any(
883
- # key.startswith(args.api_name) and value is not None for key, value in api_keys.items()):
884
- # logging.info(f'MAIN: API used: {args.api_name}')
885
- # logging.info('MAIN: Rolling Summarization will be performed.')
886
-
887
- elif args.api_name:
888
- logging.info(f'MAIN: API used: {args.api_name}')
889
- logging.info('MAIN: Summarization (not rolling) will be performed.')
890
-
891
- else:
892
- logging.info('No API specified. Summarization will not be performed.')
893
-
894
- logging.debug("Platform check being performed...")
895
- platform_check()
896
- logging.debug("CUDA check being performed...")
897
- cuda_check()
898
- processing_choice = "cpu"
899
- logging.debug("ffmpeg check being performed...")
900
- check_ffmpeg()
901
- # download_ffmpeg()
902
-
903
- llm_model = args.llm_model or None
904
- # FIXME - dirty hack
905
- args.time_based = False
906
-
907
- try:
908
- results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
909
- num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
910
- vad_filter=args.vad_filter, download_video_flag=args.video, custom_prompt=args.custom_prompt_input,
911
- overwrite=args.overwrite, rolling_summarization=args.rolling_summarization,
912
- detail=args.detail_level, keywords=args.keywords, llm_model=args.llm_model,
913
- time_based=args.time_based, set_chunk_txt_by_words=set_chunk_txt_by_words,
914
- set_max_txt_chunk_words=set_max_txt_chunk_words,
915
- set_chunk_txt_by_sentences=set_chunk_txt_by_sentences,
916
- set_max_txt_chunk_sentences=set_max_txt_chunk_sentences,
917
- set_chunk_txt_by_paragraphs=set_chunk_txt_by_paragraphs,
918
- set_max_txt_chunk_paragraphs=set_max_txt_chunk_paragraphs,
919
- set_chunk_txt_by_tokens=set_chunk_txt_by_tokens,
920
- set_max_txt_chunk_tokens=set_max_txt_chunk_tokens)
921
-
922
- logging.info('Transcription process completed.')
923
- atexit.register(cleanup_process)
924
- except Exception as e:
925
- logging.error('An error occurred during the transcription process.')
926
- logging.error(str(e))
927
- sys.exit(1)
928
-
929
- finally:
930
- cleanup_process()
 
 
1
+ #!/usr/bin/env python3
2
+ # Std Lib Imports
3
+ import argparse
4
+ import atexit
5
+ import json
6
+ import logging
7
+ import os
8
+ import signal
9
+ import sys
10
+ import time
11
+ import webbrowser
12
+ #
13
+ # Local Library Imports
14
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'App_Function_Libraries')))
15
+ from App_Function_Libraries.Book_Ingestion_Lib import ingest_folder, ingest_text_file
16
+ from App_Function_Libraries.Chunk_Lib import semantic_chunk_long_file#, rolling_summarize_function,
17
+ from App_Function_Libraries.Gradio_Related import launch_ui
18
+ from App_Function_Libraries.Local_LLM_Inference_Engine_Lib import cleanup_process, local_llm_function
19
+ from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
20
+ summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
21
+ from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai, summarize_with_anthropic, \
22
+ summarize_with_cohere, summarize_with_groq, summarize_with_openrouter, summarize_with_deepseek, \
23
+ summarize_with_huggingface, perform_transcription, perform_summarization
24
+ from App_Function_Libraries.Audio_Transcription_Lib import convert_to_wav, speech_to_text
25
+ from App_Function_Libraries.Local_File_Processing_Lib import read_paths_from_file, process_local_file
26
+ from App_Function_Libraries.SQLite_DB import add_media_to_database, is_valid_url
27
+ from App_Function_Libraries.System_Checks_Lib import cuda_check, platform_check, check_ffmpeg
28
+ from App_Function_Libraries.Utils import load_and_log_configs, sanitize_filename, create_download_directory, extract_text_from_segments
29
+ from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
30
+ #
31
+ # 3rd-Party Module Imports
32
+ import requests
33
+ # OpenAI Tokenizer support
34
+ #
35
+ # Other Tokenizers
36
+ #
37
+ #######################
38
+ # Logging Setup
39
+ #
40
+ log_level = "DEBUG"
41
+ logging.basicConfig(level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s')
42
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
43
+ #
44
+ #############
45
+ # Global variables setup
46
+ #custom_prompt_input = ("Above is the transcript of a video. Please read through the transcript carefully. Identify the "
47
+ "main topics that are discussed over the course of the transcript. Then, summarize the key points about each main "
48
+ "topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, "
49
+ "but should be much shorter than the full transcript. Please output your bullet point summary inside <bulletpoints> "
50
+ "tags.")
51
+ #
52
+ # Global variables
53
+ whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
54
+ "distil-large-v2", "distil-medium.en", "distil-small.en"]
55
+ server_mode = False
56
+ share_public = False
57
+ #
58
+ #
59
+ #######################
60
+
61
+ #######################
62
+ # Function Sections
63
+ #
64
+ abc_xyz = """
65
+ Database Setup
66
+ Config Loading
67
+ System Checks
68
+ DataBase Functions
69
+ Processing Paths and local file handling
70
+ Video Download/Handling
71
+ Audio Transcription
72
+ Diarization
73
+ Chunking-related Techniques & Functions
74
+ Tokenization-related Techniques & Functions
75
+ Summarizers
76
+ Gradio UI
77
+ Main
78
+ """
79
+ #
80
+ #
81
+ #######################
82
+ #######################
83
+ #
84
+ # TL/DW: Too Long Didn't Watch
85
+ #
86
+ # Project originally created by https://github.com/the-crypt-keeper
87
+ # Modifications made by https://github.com/rmusser01
88
+ # All credit to the original authors, I've just glued shit together.
89
+ #
90
+ #
91
+ # Usage:
92
+ #
93
+ # Download Audio only from URL -> Transcribe audio:
94
+ # python summarize.py https://www.youtube.com/watch?v=4nd1CDZP21s`
95
+ #
96
+ # Download Audio+Video from URL -> Transcribe audio from Video:**
97
+ # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s`
98
+ #
99
+ # Download Audio only from URL -> Transcribe audio -> Summarize using (`anthropic`/`cohere`/`openai`/`llama` (llama.cpp)/`ooba` (oobabooga/text-gen-webui)/`kobold` (kobold.cpp)/`tabby` (Tabbyapi)) API:**
100
+ # python summarize.py -v https://www.youtube.com/watch?v=4nd1CDZP21s -api <your choice of API>` - Make sure to put your API key into `config.txt` under the appropriate API variable
101
+ #
102
+ # Download Audio+Video from a list of videos in a text file (can be file paths or URLs) and have them all summarized:**
103
+ # python summarize.py ./local/file_on_your/system --api_name <API_name>`
104
+ #
105
+ # Run it as a WebApp**
106
+ # python summarize.py -gui` - This requires you to either stuff your API keys into the `config.txt` file, or pass them into the app every time you want to use it.
107
+ # Can be helpful for setting up a shared instance, but not wanting people to perform inference on your server.
108
+ #
109
+ #######################
110
+
111
+
112
+ #######################
113
+ # Random issues I've encountered and how I solved them:
114
+ # 1. Something about cuda nn library missing, even though cuda is installed...
115
+ # https://github.com/tensorflow/tensorflow/issues/54784 - Basically, installing zlib made it go away. idk.
116
+ # Or https://github.com/SYSTRAN/faster-whisper/issues/85
117
+ #
118
+ # 2. ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'C:\\Python312\\Scripts\\dateparser-download.exe' -> 'C:\\Python312\\Scripts\\dateparser-download.exe.deleteme'
119
+ # Resolved through adding --user to the pip install command
120
+ #
121
+ # 3. Windows: Could not locate cudnn_ops_infer64_8.dll. Please make sure it is in your library path!
122
+ #
123
+ # 4.
124
+ #
125
+ # 5.
126
+ #
127
+ #
128
+ #
129
+ #######################
130
+
131
+
132
+ #######################
133
+ # DB Setup
134
+
135
+ # Handled by SQLite_DB.py
136
+
137
+ #######################
138
+
139
+
140
+ #######################
141
+ # Config loading
142
+ #
143
+ # 1.
144
+ # 2.
145
+ #
146
+ #
147
+ #######################
148
+
149
+
150
+ #######################
151
+ # System Startup Notice
152
+ #
153
+
154
+ # Dirty hack - sue me. - FIXME - fix this...
155
+ os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
156
+
157
+ whisper_models = ["small", "medium", "small.en", "medium.en", "medium", "large", "large-v1", "large-v2", "large-v3",
158
+ "distil-large-v2", "distil-medium.en", "distil-small.en"]
159
+ source_languages = {
160
+ "en": "English",
161
+ "zh": "Chinese",
162
+ "de": "German",
163
+ "es": "Spanish",
164
+ "ru": "Russian",
165
+ "ko": "Korean",
166
+ "fr": "French"
167
+ }
168
+ source_language_list = [key[0] for key in source_languages.items()]
169
+
170
+
171
+ def print_hello():
172
+ print(r"""_____ _ ________ _ _
173
+ |_ _|| | / /| _ \| | | | _
174
+ | | | | / / | | | || | | |(_)
175
+ | | | | / / | | | || |/\| |
176
+ | | | |____ / / | |/ / \ /\ / _
177
+ \_/ \_____//_/ |___/ \/ \/ (_)
178
+
179
+
180
+ _ _
181
+ | | | |
182
+ | |_ ___ ___ | | ___ _ __ __ _
183
+ | __| / _ \ / _ \ | | / _ \ | '_ \ / _` |
184
+ | |_ | (_) || (_) | | || (_) || | | || (_| | _
185
+ \__| \___/ \___/ |_| \___/ |_| |_| \__, |( )
186
+ __/ ||/
187
+ |___/
188
+ _ _ _ _ _ _ _
189
+ | |(_) | | ( )| | | | | |
190
+ __| | _ __| | _ __ |/ | |_ __ __ __ _ | |_ ___ | |__
191
+ / _` || | / _` || '_ \ | __| \ \ /\ / / / _` || __| / __|| '_ \
192
+ | (_| || || (_| || | | | | |_ \ V V / | (_| || |_ | (__ | | | |
193
+ \__,_||_| \__,_||_| |_| \__| \_/\_/ \__,_| \__| \___||_| |_|
194
+ """)
195
+ time.sleep(1)
196
+ return
197
+
198
+
199
+ #
200
+ #
201
+ #######################
202
+
203
+
204
+ #######################
205
+ # System Check Functions
206
+ #
207
+ # 1. platform_check()
208
+ # 2. cuda_check()
209
+ # 3. decide_cpugpu()
210
+ # 4. check_ffmpeg()
211
+ # 5. download_ffmpeg()
212
+ #
213
+ #######################
214
+
215
+
216
+ #######################
217
+ # DB Functions
218
+ #
219
+ # create_tables()
220
+ # add_keyword()
221
+ # delete_keyword()
222
+ # add_keyword()
223
+ # add_media_with_keywords()
224
+ # search_db()
225
+ # format_results()
226
+ # search_and_display()
227
+ # export_to_csv()
228
+ # is_valid_url()
229
+ # is_valid_date()
230
+ #
231
+ ########################################################################################################################
232
+
233
+
234
+ ########################################################################################################################
235
+ # Processing Paths and local file handling
236
+ #
237
+ # Function List
238
+ # 1. read_paths_from_file(file_path)
239
+ # 2. process_path(path)
240
+ # 3. process_local_file(file_path)
241
+ # 4. read_paths_from_file(file_path: str) -> List[str]
242
+ #
243
+ #
244
+ ########################################################################################################################
245
+
246
+
247
+ #######################################################################################################################
248
+ # Online Article Extraction / Handling
249
+ #
250
+ # Function List
251
+ # 1. get_page_title(url)
252
+ # 2. get_article_text(url)
253
+ # 3. get_article_title(article_url_arg)
254
+ #
255
+ #
256
+ #######################################################################################################################
257
+
258
+
259
+ #######################################################################################################################
260
+ # Video Download/Handling
261
+ # Video-DL-Ingestion-Lib
262
+ #
263
+ # Function List
264
+ # 1. get_video_info(url)
265
+ # 2. create_download_directory(title)
266
+ # 3. sanitize_filename(title)
267
+ # 4. normalize_title(title)
268
+ # 5. get_youtube(video_url)
269
+ # 6. get_playlist_videos(playlist_url)
270
+ # 7. download_video(video_url, download_path, info_dict, download_video_flag)
271
+ # 8. save_to_file(video_urls, filename)
272
+ # 9. save_summary_to_file(summary, file_path)
273
+ # 10. process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, download_video, download_audio, rolling_summarization, detail_level, question_box, keywords, ) # FIXME - UPDATE
274
+ #
275
+ #
276
+ #######################################################################################################################
277
+
278
+
279
+ #######################################################################################################################
280
+ # Audio Transcription
281
+ #
282
+ # Function List
283
+ # 1. convert_to_wav(video_file_path, offset=0, overwrite=False)
284
+ # 2. speech_to_text(audio_file_path, selected_source_lang='en', whisper_model='small.en', vad_filter=False)
285
+ #
286
+ #
287
+ #######################################################################################################################
288
+
289
+
290
+ #######################################################################################################################
291
+ # Diarization
292
+ #
293
+ # Function List 1. speaker_diarize(video_file_path, segments, embedding_model = "pyannote/embedding",
294
+ # embedding_size=512, num_speakers=0)
295
+ #
296
+ #
297
+ #######################################################################################################################
298
+
299
+
300
+ #######################################################################################################################
301
+ # Chunking-related Techniques & Functions
302
+ #
303
+ #
304
+ # FIXME
305
+ #
306
+ #
307
+ #######################################################################################################################
308
+
309
+
310
+ #######################################################################################################################
311
+ # Tokenization-related Functions
312
+ #
313
+ #
314
+
315
+ # FIXME
316
+
317
+ #
318
+ #
319
+ #######################################################################################################################
320
+
321
+
322
+ #######################################################################################################################
323
+ # Website-related Techniques & Functions
324
+ #
325
+ #
326
+
327
+ #
328
+ #
329
+ #######################################################################################################################
330
+
331
+
332
+ #######################################################################################################################
333
+ # Summarizers
334
+ #
335
+ # Function List
336
+ # 1. extract_text_from_segments(segments: List[Dict]) -> str
337
+ # 2. summarize_with_openai(api_key, file_path, custom_prompt_arg)
338
+ # 3. summarize_with_anthropic(api_key, file_path, model, custom_prompt_arg, max_retries=3, retry_delay=5)
339
+ # 4. summarize_with_cohere(api_key, file_path, model, custom_prompt_arg)
340
+ # 5. summarize_with_groq(api_key, file_path, model, custom_prompt_arg)
341
+ #
342
+ #################################
343
+ # Local Summarization
344
+ #
345
+ # Function List
346
+ #
347
+ # 1. summarize_with_local_llm(file_path, custom_prompt_arg)
348
+ # 2. summarize_with_llama(api_url, file_path, token, custom_prompt)
349
+ # 3. summarize_with_kobold(api_url, file_path, kobold_api_token, custom_prompt)
350
+ # 4. summarize_with_oobabooga(api_url, file_path, ooba_api_token, custom_prompt)
351
+ # 5. summarize_with_vllm(vllm_api_url, vllm_api_key_function_arg, llm_model, text, vllm_custom_prompt_function_arg)
352
+ # 6. summarize_with_tabbyapi(tabby_api_key, tabby_api_IP, text, tabby_model, custom_prompt)
353
+ # 7. save_summary_to_file(summary, file_path)
354
+ #
355
+ #######################################################################################################################
356
+
357
+
358
+ #######################################################################################################################
359
+ # Summarization with Detail
360
+ #
361
+
362
+ # FIXME - see 'Old_Chunking_Lib.py'
363
+
364
+ #
365
+ #
366
+ #######################################################################################################################
367
+
368
+
369
+ #######################################################################################################################
370
+ # Gradio UI
371
+ #
372
+ #
373
+ #
374
+ #
375
+ #
376
+ #################################################################################################################
377
+ #
378
+ #######################################################################################################################
379
+ # Local LLM Setup / Running
380
+ #
381
+ # Function List
382
+ # 1. download_latest_llamafile(repo, asset_name_prefix, output_filename)
383
+ # 2. download_file(url, dest_path, expected_checksum=None, max_retries=3, delay=5)
384
+ # 3. verify_checksum(file_path, expected_checksum)
385
+ # 4. cleanup_process()
386
+ # 5. signal_handler(sig, frame)
387
+ # 6. local_llm_function()
388
+ # 7. launch_in_new_terminal_windows(executable, args)
389
+ # 8. launch_in_new_terminal_linux(executable, args)
390
+ # 9. launch_in_new_terminal_mac(executable, args)
391
+ #
392
+ #
393
+ #######################################################################################################################
394
+
395
+
396
+ #######################################################################################################################
397
+ # Helper Functions for Main() & process_url()
398
+ #
399
+ #
400
+ #
401
+ #######################################################################################################################
402
+
403
+
404
+ ######################################################################################################################
405
+ # Main()
406
+ #
407
+
408
+ def main(input_path, api_name=None, api_key=None,
409
+ num_speakers=2,
410
+ whisper_model="small.en",
411
+ offset=0,
412
+ vad_filter=False,
413
+ download_video_flag=False,
414
+ custom_prompt=None,
415
+ overwrite=False,
416
+ rolling_summarization=False,
417
+ detail=0.01,
418
+ keywords=None,
419
+ llm_model=None,
420
+ time_based=False,
421
+ set_chunk_txt_by_words=False,
422
+ set_max_txt_chunk_words=0,
423
+ set_chunk_txt_by_sentences=False,
424
+ set_max_txt_chunk_sentences=0,
425
+ set_chunk_txt_by_paragraphs=False,
426
+ set_max_txt_chunk_paragraphs=0,
427
+ set_chunk_txt_by_tokens=False,
428
+ set_max_txt_chunk_tokens=0,
429
+ ingest_text_file=False,
430
+ chunk=False,
431
+ max_chunk_size=2000,
432
+ chunk_overlap=100,
433
+ chunk_unit='tokens',
434
+ summarize_chunks=None,
435
+ diarize=False
436
+ ):
437
+ global detail_level_number, summary, audio_file, transcription_text, info_dict
438
+
439
+ detail_level = detail
440
+
441
+ print(f"Keywords: {keywords}")
442
+
443
+ if not input_path:
444
+ return []
445
+
446
+ start_time = time.monotonic()
447
+ paths = [input_path] if not os.path.isfile(input_path) else read_paths_from_file(input_path)
448
+ results = []
449
+
450
+ for path in paths:
451
+ try:
452
+ if path.startswith('http'):
453
+ info_dict, title = extract_video_info(path)
454
+ download_path = create_download_directory(title)
455
+ video_path = download_video(path, download_path, info_dict, download_video_flag)
456
+
457
+ if video_path:
458
+ if diarize:
459
+ audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
460
+ transcription_text = {'audio_file': audio_file, 'transcription': segments}
461
+ else:
462
+ audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
463
+ transcription_text = {'audio_file': audio_file, 'transcription': segments}
464
+
465
+ # FIXME rolling summarization
466
+ if rolling_summarization == True:
467
+ pass
468
+ # text = extract_text_from_segments(segments)
469
+ # detail = detail_level
470
+ # additional_instructions = custom_prompt_input
471
+ # chunk_text_by_words = set_chunk_txt_by_words
472
+ # max_words = set_max_txt_chunk_words
473
+ # chunk_text_by_sentences = set_chunk_txt_by_sentences
474
+ # max_sentences = set_max_txt_chunk_sentences
475
+ # chunk_text_by_paragraphs = set_chunk_txt_by_paragraphs
476
+ # max_paragraphs = set_max_txt_chunk_paragraphs
477
+ # chunk_text_by_tokens = set_chunk_txt_by_tokens
478
+ # max_tokens = set_max_txt_chunk_tokens
479
+ # # FIXME
480
+ # summarize_recursively = rolling_summarization
481
+ # verbose = False
482
+ # model = None
483
+ # summary = rolling_summarize_function(text, detail, api_name, api_key, model, custom_prompt_input,
484
+ # chunk_text_by_words,
485
+ # max_words, chunk_text_by_sentences,
486
+ # max_sentences, chunk_text_by_paragraphs,
487
+ # max_paragraphs, chunk_text_by_tokens,
488
+ # max_tokens, summarize_recursively, verbose
489
+ # )
490
+
491
+
492
+ elif api_name:
493
+ summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
494
+ else:
495
+ summary = None
496
+
497
+ if summary:
498
+ # Save the summary file in the download_path directory
499
+ summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
500
+ with open(summary_file_path, 'w') as file:
501
+ file.write(summary)
502
+
503
+ add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
504
+ else:
505
+ logging.error(f"Failed to download video: {path}")
506
+
507
+ # FIXME - make sure this doesn't break ingesting multiple videos vs multiple text files
508
+ # FIXME - Need to update so that chunking is fully handled.
509
+ elif chunk and path.lower().endswith('.txt'):
510
+ chunks = semantic_chunk_long_file(path, max_chunk_size, chunk_overlap)
511
+ if chunks:
512
+ chunks_data = {
513
+ "file_path": path,
514
+ "chunk_unit": chunk_unit,
515
+ "max_chunk_size": max_chunk_size,
516
+ "chunk_overlap": chunk_overlap,
517
+ "chunks": []
518
+ }
519
+ summaries_data = {
520
+ "file_path": path,
521
+ "summarization_method": summarize_chunks,
522
+ "summaries": []
523
+ }
524
+
525
+ for i, chunk_text in enumerate(chunks):
526
+ chunk_info = {
527
+ "chunk_id": i + 1,
528
+ "text": chunk_text
529
+ }
530
+ chunks_data["chunks"].append(chunk_info)
531
+
532
+ if summarize_chunks:
533
+ summary = None
534
+ if summarize_chunks == 'openai':
535
+ summary = summarize_with_openai(api_key, chunk_text, custom_prompt)
536
+ elif summarize_chunks == 'anthropic':
537
+ summary = summarize_with_anthropic(api_key, chunk_text, custom_prompt)
538
+ elif summarize_chunks == 'cohere':
539
+ summary = summarize_with_cohere(api_key, chunk_text, custom_prompt)
540
+ elif summarize_chunks == 'groq':
541
+ summary = summarize_with_groq(api_key, chunk_text, custom_prompt)
542
+ elif summarize_chunks == 'local-llm':
543
+ summary = summarize_with_local_llm(chunk_text, custom_prompt)
544
+ # FIXME - Add more summarization methods as needed
545
+
546
+ if summary:
547
+ summary_info = {
548
+ "chunk_id": i + 1,
549
+ "summary": summary
550
+ }
551
+ summaries_data["summaries"].append(summary_info)
552
+ else:
553
+ logging.warning(f"Failed to generate summary for chunk {i + 1}")
554
+
555
+ # Save chunks to a single JSON file
556
+ chunks_file_path = f"{path}_chunks.json"
557
+ with open(chunks_file_path, 'w', encoding='utf-8') as f:
558
+ json.dump(chunks_data, f, ensure_ascii=False, indent=2)
559
+ logging.info(f"All chunks saved to {chunks_file_path}")
560
+
561
+ # Save summaries to a single JSON file (if summarization was performed)
562
+ if summarize_chunks:
563
+ summaries_file_path = f"{path}_summaries.json"
564
+ with open(summaries_file_path, 'w', encoding='utf-8') as f:
565
+ json.dump(summaries_data, f, ensure_ascii=False, indent=2)
566
+ logging.info(f"All summaries saved to {summaries_file_path}")
567
+
568
+ logging.info(f"File {path} chunked into {len(chunks)} parts using {chunk_unit} as the unit.")
569
+ else:
570
+ logging.error(f"Failed to chunk file {path}")
571
+
572
+ # Handle downloading of URLs from a text file or processing local video/audio files
573
+ else:
574
+ download_path, info_dict, urls_or_media_file = process_local_file(path)
575
+ if isinstance(urls_or_media_file, list):
576
+ # Text file containing URLs
577
+ for url in urls_or_media_file:
578
+ for item in urls_or_media_file:
579
+ if item.startswith(('http://', 'https://')):
580
+ info_dict, title = extract_video_info(url)
581
+ download_path = create_download_directory(title)
582
+ video_path = download_video(url, download_path, info_dict, download_video_flag)
583
+
584
+ if video_path:
585
+ if diarize:
586
+ audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter, diarize=True)
587
+ else:
588
+ audio_file, segments = perform_transcription(video_path, offset, whisper_model, vad_filter)
589
+
590
+ transcription_text = {'audio_file': audio_file, 'transcription': segments}
591
+ if rolling_summarization:
592
+ text = extract_text_from_segments(segments)
593
+ # FIXME
594
+ #summary = summarize_with_detail_openai(text, detail=detail)
595
+ elif api_name:
596
+ summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
597
+ else:
598
+ summary = None
599
+
600
+ if summary:
601
+ # Save the summary file in the download_path directory
602
+ summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
603
+ with open(summary_file_path, 'w') as file:
604
+ file.write(summary)
605
+
606
+ add_media_to_database(url, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
607
+ else:
608
+ logging.error(f"Failed to download video: {url}")
609
+
610
+ else:
611
+ # Video or audio or txt file
612
+ media_path = urls_or_media_file
613
+
614
+ if media_path.lower().endswith(('.txt', '.md')):
615
+ if media_path.lower().endswith('.txt'):
616
+ # Handle text file ingestion
617
+ result = ingest_text_file(media_path)
618
+ logging.info(result)
619
+ elif media_path.lower().endswith(('.mp4', '.avi', '.mov')):
620
+ if diarize:
621
+ audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter, diarize=True)
622
+ else:
623
+ audio_file, segments = perform_transcription(media_path, offset, whisper_model, vad_filter)
624
+ elif media_path.lower().endswith(('.wav', '.mp3', '.m4a')):
625
+ if diarize:
626
+ segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter, diarize=True)
627
+ else:
628
+ segments = speech_to_text(media_path, whisper_model=whisper_model, vad_filter=vad_filter)
629
+ else:
630
+ logging.error(f"Unsupported media file format: {media_path}")
631
+ continue
632
+
633
+ transcription_text = {'media_path': path, 'audio_file': media_path, 'transcription': segments}
634
+
635
+ # FIXME
636
+ if rolling_summarization:
637
+ # text = extract_text_from_segments(segments)
638
+ # summary = summarize_with_detail_openai(text, detail=detail)
639
+ pass
640
+ elif api_name:
641
+ summary = perform_summarization(api_name, transcription_text, custom_prompt_input, api_key)
642
+ else:
643
+ summary = None
644
+
645
+ if summary:
646
+ # Save the summary file in the download_path directory
647
+ summary_file_path = os.path.join(download_path, f"{transcription_text}_summary.txt")
648
+ with open(summary_file_path, 'w') as file:
649
+ file.write(summary)
650
+
651
+ add_media_to_database(path, info_dict, segments, summary, keywords, custom_prompt_input, whisper_model)
652
+
653
+ except Exception as e:
654
+ logging.error(f"Error processing {path}: {str(e)}")
655
+ continue
656
+
657
+ return transcription_text
658
+
659
+
660
+ def signal_handler(sig, frame):
661
+ logging.info('Signal handler called with signal: %s', sig)
662
+ cleanup_process()
663
+ sys.exit(0)
664
+
665
+
666
+ ############################## MAIN ##############################
667
+ #
668
+ #
669
+
670
+ if __name__ == "__main__":
671
+ # Register signal handlers
672
+ signal.signal(signal.SIGINT, signal_handler)
673
+ signal.signal(signal.SIGTERM, signal_handler)
674
+
675
+ # Logging setup
676
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
677
+
678
+ # Load Config
679
+ loaded_config_data = load_and_log_configs()
680
+
681
+ if loaded_config_data:
682
+ logging.info("Main: Configuration loaded successfully")
683
+ # You can access the configuration data like this:
684
+ # print(f"OpenAI API Key: {config_data['api_keys']['openai']}")
685
+ # print(f"Anthropic Model: {config_data['models']['anthropic']}")
686
+ # print(f"Kobold API IP: {config_data['local_apis']['kobold']['ip']}")
687
+ # print(f"Output Path: {config_data['output_path']}")
688
+ # print(f"Processing Choice: {config_data['processing_choice']}")
689
+ else:
690
+ print("Failed to load configuration")
691
+
692
+ # Print ascii_art
693
+ print_hello()
694
+
695
+ transcription_text = None
696
+
697
+ parser = argparse.ArgumentParser(
698
+ description='Transcribe and summarize videos.',
699
+ epilog='''
700
+ Sample commands:
701
+ 1. Simple Sample command structure:
702
+ summarize.py <path_to_video> -api openai -k tag_one tag_two tag_three
703
+
704
+ 2. Rolling Summary Sample command structure:
705
+ summarize.py <path_to_video> -api openai -prompt "custom_prompt_goes_here-is-appended-after-transcription" -roll -detail 0.01 -k tag_one tag_two tag_three
706
+
707
+ 3. FULL Sample command structure:
708
+ summarize.py <path_to_video> -api openai -ns 2 -wm small.en -off 0 -vad -log INFO -prompt "custom_prompt" -overwrite -roll -detail 0.01 -k tag_one tag_two tag_three
709
+
710
+ 4. Sample command structure for UI:
711
+ summarize.py -gui -log DEBUG
712
+ ''',
713
+ formatter_class=argparse.RawTextHelpFormatter
714
+ )
715
+ parser.add_argument('input_path', type=str, help='Path or URL of the video', nargs='?')
716
+ parser.add_argument('-v', '--video', action='store_true', help='Download the video instead of just the audio')
717
+ parser.add_argument('-api', '--api_name', type=str, help='API name for summarization (optional)')
718
+ parser.add_argument('-key', '--api_key', type=str, help='API key for summarization (optional)')
719
+ parser.add_argument('-ns', '--num_speakers', type=int, default=2, help='Number of speakers (default: 2)')
720
+ parser.add_argument('-wm', '--whisper_model', type=str, default='small',
721
+ help='Whisper model (default: small)| Options: tiny.en, tiny, base.en, base, small.en, small, medium.en, '
722
+ 'medium, large-v1, large-v2, large-v3, large, distil-large-v2, distil-medium.en, '
723
+ 'distil-small.en')
724
+ parser.add_argument('-off', '--offset', type=int, default=0, help='Offset in seconds (default: 0)')
725
+ parser.add_argument('-vad', '--vad_filter', action='store_true', help='Enable VAD filter')
726
+ parser.add_argument('-log', '--log_level', type=str, default='INFO',
727
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], help='Log level (default: INFO)')
728
+ parser.add_argument('-gui', '--user_interface', action='store_true', default=True, help="Launch the Gradio user interface")
729
+ parser.add_argument('-demo', '--demo_mode', action='store_true', help='Enable demo mode')
730
+ parser.add_argument('-prompt', '--custom_prompt', type=str,
731
+ help='Pass in a custom prompt to be used in place of the existing one.\n (Probably should just '
732
+ 'modify the script itself...)')
733
+ parser.add_argument('-overwrite', '--overwrite', action='store_true', help='Overwrite existing files')
734
+ parser.add_argument('-roll', '--rolling_summarization', action='store_true', help='Enable rolling summarization')
735
+ parser.add_argument('-detail', '--detail_level', type=float, help='Mandatory if rolling summarization is enabled, '
736
+ 'defines the chunk size.\n Default is 0.01(lots '
737
+ 'of chunks) -> 1.00 (few chunks)\n Currently '
738
+ 'only OpenAI works. ',
739
+ default=0.01, )
740
+ parser.add_argument('-model', '--llm_model', type=str, default='',
741
+ help='Model to use for LLM summarization (only used for vLLM/TabbyAPI)')
742
+ parser.add_argument('-k', '--keywords', nargs='+', default=['cli_ingest_no_tag'],
743
+ help='Keywords for tagging the media, can use multiple separated by spaces (default: cli_ingest_no_tag)')
744
+ parser.add_argument('--log_file', type=str, help='Where to save logfile (non-default)')
745
+ parser.add_argument('--local_llm', action='store_true',
746
+ help="Use a local LLM from the script(Downloads llamafile from github and 'mistral-7b-instruct-v0.2.Q8' - 8GB model from Huggingface)")
747
+ parser.add_argument('--server_mode', action='store_true',
748
+ help='Run in server mode (This exposes the GUI/Server to the network)')
749
+ parser.add_argument('--share_public', type=int, default=7860,
750
+ help="This will use Gradio's built-in ngrok tunneling to share the server publicly on the internet. Specify the port to use (default: 7860)")
751
+ parser.add_argument('--port', type=int, default=7860, help='Port to run the server on')
752
+ parser.add_argument('--ingest_text_file', action='store_true',
753
+ help='Ingest .txt files as content instead of treating them as URL lists')
754
+ parser.add_argument('--text_title', type=str, help='Title for the text file being ingested')
755
+ parser.add_argument('--text_author', type=str, help='Author of the text file being ingested')
756
+ parser.add_argument('--diarize', action='store_true', help='Enable speaker diarization')
757
+ # parser.add_argument('--offload', type=int, default=20, help='Numbers of layers to offload to GPU for Llamafile usage')
758
+ # parser.add_argument('-o', '--output_path', type=str, help='Path to save the output file')
759
+
760
+ args = parser.parse_args()
761
+
762
+ # Set Chunking values/variables
763
+ set_chunk_txt_by_words = False
764
+ set_max_txt_chunk_words = 0
765
+ set_chunk_txt_by_sentences = False
766
+ set_max_txt_chunk_sentences = 0
767
+ set_chunk_txt_by_paragraphs = False
768
+ set_max_txt_chunk_paragraphs = 0
769
+ set_chunk_txt_by_tokens = False
770
+ set_max_txt_chunk_tokens = 0
771
+
772
+ if args.share_public:
773
+ share_public = args.share_public
774
+ else:
775
+ share_public = None
776
+ if args.server_mode:
777
+
778
+ server_mode = args.server_mode
779
+ else:
780
+ server_mode = None
781
+ if args.server_mode is True:
782
+ server_mode = True
783
+ if args.port:
784
+ server_port = args.port
785
+ else:
786
+ server_port = None
787
+
788
+ ########## Logging setup
789
+ logger = logging.getLogger()
790
+ logger.setLevel(getattr(logging, args.log_level))
791
+
792
+ # Create console handler
793
+ console_handler = logging.StreamHandler()
794
+ console_handler.setLevel(getattr(logging, args.log_level))
795
+ console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
796
+ console_handler.setFormatter(console_formatter)
797
+
798
+ if args.log_file:
799
+ # Create file handler
800
+ file_handler = logging.FileHandler(args.log_file)
801
+ file_handler.setLevel(getattr(logging, args.log_level))
802
+ file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
803
+ file_handler.setFormatter(file_formatter)
804
+ logger.addHandler(file_handler)
805
+ logger.info(f"Log file created at: {args.log_file}")
806
+
807
+ # Check if the user wants to use the local LLM from the script
808
+ local_llm = args.local_llm
809
+ logging.info(f'Local LLM flag: {local_llm}')
810
+
811
+ # Check if the user wants to ingest a text file (singular or multiple from a folder)
812
+ if args.input_path is not None:
813
+ if os.path.isdir(args.input_path) and args.ingest_text_file:
814
+ results = ingest_folder(args.input_path, keywords=args.keywords)
815
+ for result in results:
816
+ print(result)
817
+ elif args.input_path.lower().endswith('.txt') and args.ingest_text_file:
818
+ result = ingest_text_file(args.input_path, title=args.text_title, author=args.text_author,
819
+ keywords=args.keywords)
820
+ print(result)
821
+ sys.exit(0)
822
+
823
+ # Launch the GUI
824
+ # This is huggingface so:
825
+ if args.user_interface:
826
+ if local_llm:
827
+ local_llm_function()
828
+ time.sleep(2)
829
+ webbrowser.open_new_tab('http://127.0.0.1:7860')
830
+ launch_ui()
831
+ elif not args.input_path:
832
+ parser.print_help()
833
+ sys.exit(1)
834
+
835
+ else:
836
+ logging.info('Starting the transcription and summarization process.')
837
+ logging.info(f'Input path: {args.input_path}')
838
+ logging.info(f'API Name: {args.api_name}')
839
+ logging.info(f'Number of speakers: {args.num_speakers}')
840
+ logging.info(f'Whisper model: {args.whisper_model}')
841
+ logging.info(f'Offset: {args.offset}')
842
+ logging.info(f'VAD filter: {args.vad_filter}')
843
+ logging.info(f'Log Level: {args.log_level}')
844
+ logging.info(f'Demo Mode: {args.demo_mode}')
845
+ logging.info(f'Custom Prompt: {args.custom_prompt}')
846
+ logging.info(f'Overwrite: {args.overwrite}')
847
+ logging.info(f'Rolling Summarization: {args.rolling_summarization}')
848
+ logging.info(f'User Interface: {args.user_interface}')
849
+ logging.info(f'Video Download: {args.video}')
850
+ # logging.info(f'Save File location: {args.output_path}')
851
+ # logging.info(f'Log File location: {args.log_file}')
852
+
853
+ global api_name
854
+ api_name = args.api_name
855
+
856
+ ########## Custom Prompt setup
857
+ custom_prompt_input = args.custom_prompt
858
+
859
+ if not args.custom_prompt:
860
+ logging.debug("No custom prompt defined, will use default")
861
+ args.custom_prompt_input = (
862
+ "\n\nabove is the transcript of a video. "
863
+ "Please read through the transcript carefully. Identify the main topics that are "
864
+ "discussed over the course of the transcript. Then, summarize the key points about each "
865
+ "main topic in a concise bullet point. The bullet points should cover the key "
866
+ "information conveyed about each topic in the video, but should be much shorter than "
867
+ "the full transcript. Please output your bullet point summary inside <bulletpoints> "
868
+ "tags."
869
+ )
870
+ print("No custom prompt defined, will use default")
871
+
872
+ custom_prompt_input = args.custom_prompt
873
+ else:
874
+ logging.debug(f"Custom prompt defined, will use \n\nf{custom_prompt_input} \n\nas the prompt")
875
+ print(f"Custom Prompt has been defined. Custom prompt: \n\n {args.custom_prompt}")
876
+
877
+
878
+ summary = None # Initialize to ensure it's always defined
879
+ if args.detail_level == None:
880
+ args.detail_level = 0.01
881
+
882
+ # FIXME
883
+ # if args.api_name and args.rolling_summarization and any(
884
+ # key.startswith(args.api_name) and value is not None for key, value in api_keys.items()):
885
+ # logging.info(f'MAIN: API used: {args.api_name}')
886
+ # logging.info('MAIN: Rolling Summarization will be performed.')
887
+
888
+ elif args.api_name:
889
+ logging.info(f'MAIN: API used: {args.api_name}')
890
+ logging.info('MAIN: Summarization (not rolling) will be performed.')
891
+
892
+ else:
893
+ logging.info('No API specified. Summarization will not be performed.')
894
+
895
+ logging.debug("Platform check being performed...")
896
+ platform_check()
897
+ logging.debug("CUDA check being performed...")
898
+ cuda_check()
899
+ processing_choice = "cpu"
900
+ logging.debug("ffmpeg check being performed...")
901
+ check_ffmpeg()
902
+ # download_ffmpeg()
903
+
904
+ llm_model = args.llm_model or None
905
+ # FIXME - dirty hack
906
+ args.time_based = False
907
+
908
+ try:
909
+ results = main(args.input_path, api_name=args.api_name, api_key=args.api_key,
910
+ num_speakers=args.num_speakers, whisper_model=args.whisper_model, offset=args.offset,
911
+ vad_filter=args.vad_filter, download_video_flag=args.video, custom_prompt=args.custom_prompt_input,
912
+ overwrite=args.overwrite, rolling_summarization=args.rolling_summarization,
913
+ detail=args.detail_level, keywords=args.keywords, llm_model=args.llm_model,
914
+ time_based=args.time_based, set_chunk_txt_by_words=set_chunk_txt_by_words,
915
+ set_max_txt_chunk_words=set_max_txt_chunk_words,
916
+ set_chunk_txt_by_sentences=set_chunk_txt_by_sentences,
917
+ set_max_txt_chunk_sentences=set_max_txt_chunk_sentences,
918
+ set_chunk_txt_by_paragraphs=set_chunk_txt_by_paragraphs,
919
+ set_max_txt_chunk_paragraphs=set_max_txt_chunk_paragraphs,
920
+ set_chunk_txt_by_tokens=set_chunk_txt_by_tokens,
921
+ set_max_txt_chunk_tokens=set_max_txt_chunk_tokens)
922
+
923
+ logging.info('Transcription process completed.')
924
+ atexit.register(cleanup_process)
925
+ except Exception as e:
926
+ logging.error('An error occurred during the transcription process.')
927
+ logging.error(str(e))
928
+ sys.exit(1)
929
+
930
+ finally:
931
+ cleanup_process()