oceansweep commited on
Commit
2842ee9
1 Parent(s): 370522e

Upload Summarization_General_Lib.py

Browse files
App_Function_Libraries/Summarization_General_Lib.py CHANGED
@@ -20,6 +20,7 @@ import json
20
  import logging
21
  import os
22
  import time
 
23
 
24
  import requests
25
  from requests import RequestException
@@ -30,9 +31,9 @@ from App_Function_Libraries.Chunk_Lib import semantic_chunking, rolling_summariz
30
  from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
31
  from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
32
  summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
33
- from App_Function_Libraries.DB_Manager import add_media_to_database
34
  # Import Local
35
- from App_Function_Libraries.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
36
  clean_youtube_url, create_download_directory, is_valid_url
37
  from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
38
 
@@ -43,6 +44,55 @@ from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extrac
43
  config = load_comprehensive_config()
44
  openai_api_key = config.get('API', 'openai_api_key', fallback=None)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def extract_text_from_segments(segments):
47
  logging.debug(f"Segments received: {segments}")
48
  logging.debug(f"Type of segments: {type(segments)}")
@@ -65,18 +115,18 @@ def extract_text_from_segments(segments):
65
 
66
  def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
67
  loaded_config_data = load_and_log_configs()
68
-
69
  try:
70
  # API key validation
71
- if api_key is None or api_key.strip() == "":
72
  logging.info("OpenAI: #1 API key not provided as parameter")
73
  logging.info("OpenAI: Attempting to use API key from config file")
74
  api_key = loaded_config_data['api_keys']['openai']
75
 
76
- if api_key is None or api_key.strip() == "":
77
  logging.error("OpenAI: #2 API key not found or is empty")
78
  return "OpenAI: API Key Not Provided/Found in Config file or is empty"
79
 
 
80
  logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
81
 
82
  # Input data handling
@@ -121,7 +171,6 @@ def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, sys
121
  else:
122
  raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
123
 
124
- openai_model = loaded_config_data['models']['openai'] or "gpt-4o"
125
  logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
126
  logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
127
 
@@ -205,6 +254,7 @@ def summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp=None,
205
  if not anthropic_api_key or not anthropic_api_key.strip():
206
  logging.error("Anthropic: No valid API key available")
207
  # You might want to raise an exception here or handle this case as appropriate for your application
 
208
  # For example: raise ValueError("No valid Anthropic API key available")
209
 
210
 
@@ -344,6 +394,7 @@ def summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp=None, sys
344
  if not cohere_api_key or not cohere_api_key.strip():
345
  logging.error("Cohere: No valid API key available")
346
  # You might want to raise an exception here or handle this case as appropriate for your application
 
347
  # For example: raise ValueError("No valid Anthropic API key available")
348
 
349
  if custom_prompt_arg is None:
@@ -455,6 +506,7 @@ def summarize_with_groq(api_key, input_data, custom_prompt_arg, temp=None, syste
455
  if not groq_api_key or not groq_api_key.strip():
456
  logging.error("Anthropic: No valid API key available")
457
  # You might want to raise an exception here or handle this case as appropriate for your application
 
458
  # For example: raise ValueError("No valid Anthropic API key available")
459
 
460
  logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
@@ -683,6 +735,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
683
  if not huggingface_api_key or not huggingface_api_key.strip():
684
  logging.error("HuggingFace: No valid API key available")
685
  # You might want to raise an exception here or handle this case as appropriate for your application
 
686
  # For example: raise ValueError("No valid Anthropic API key available")
687
 
688
 
@@ -733,7 +786,7 @@ def summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp=None
733
  response = requests.post(API_URL, headers=headers, json=data)
734
 
735
  if response.status_code == 200:
736
- summary = response.json()[0]['summary_text']
737
  logging.debug("huggingface: Summarization successful")
738
  print("Summarization successful.")
739
  return summary
@@ -772,6 +825,7 @@ def summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp=None, s
772
  if not deepseek_api_key or not deepseek_api_key.strip():
773
  logging.error("DeepSeek: No valid API key available")
774
  # You might want to raise an exception here or handle this case as appropriate for your application
 
775
  # For example: raise ValueError("No valid deepseek API key available")
776
 
777
 
@@ -877,6 +931,7 @@ def summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp=None, sy
877
  if not mistral_api_key or not mistral_api_key.strip():
878
  logging.error("Mistral: No valid API key available")
879
  # You might want to raise an exception here or handle this case as appropriate for your application
 
880
  # For example: raise ValueError("No valid deepseek API key available")
881
 
882
 
@@ -1145,39 +1200,14 @@ def save_transcription_and_summary(transcription_text, summary_text, download_pa
1145
  def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
1146
  logging.debug("Entered 'summarize_chunk' function")
1147
  try:
1148
- if api_name.lower() == 'openai':
1149
- return summarize_with_openai(api_key, text, custom_prompt_input, temp, system_message)
1150
- elif api_name.lower() == "anthropic":
1151
- return summarize_with_anthropic(api_key, text, custom_prompt_input, temp, system_message)
1152
- elif api_name.lower() == "cohere":
1153
- return summarize_with_cohere(api_key, text, custom_prompt_input, temp, system_message)
1154
- elif api_name.lower() == "groq":
1155
- return summarize_with_groq(api_key, text, custom_prompt_input, temp, system_message)
1156
- elif api_name.lower() == "openrouter":
1157
- return summarize_with_openrouter(api_key, text, custom_prompt_input, temp, system_message)
1158
- elif api_name.lower() == "deepseek":
1159
- return summarize_with_deepseek(api_key, text, custom_prompt_input, temp, system_message)
1160
- elif api_name.lower() == "mistral":
1161
- return summarize_with_mistral(api_key, text, custom_prompt_input, temp, system_message)
1162
- elif api_name.lower() == "llama.cpp":
1163
- return summarize_with_llama(text, custom_prompt_input, temp, system_message)
1164
- elif api_name.lower() == "kobold":
1165
- return summarize_with_kobold(text, api_key, custom_prompt_input, temp, system_message)
1166
- elif api_name.lower() == "ooba":
1167
- return summarize_with_oobabooga(text, api_key, custom_prompt_input, temp, system_message)
1168
- elif api_name.lower() == "tabbyapi":
1169
- return summarize_with_tabbyapi(text, custom_prompt_input, temp, system_message)
1170
- elif api_name.lower() == "vllm":
1171
- return summarize_with_vllm(text, custom_prompt_input, temp, system_message)
1172
- elif api_name.lower() == "local-llm":
1173
- return summarize_with_local_llm(text, custom_prompt_input, temp, system_message)
1174
- elif api_name.lower() == "huggingface":
1175
- return summarize_with_huggingface(api_key, text, custom_prompt_input, temp, )#system_message)
1176
- else:
1177
- logging.warning(f"Unsupported API: {api_name}")
1178
  return None
 
 
1179
  except Exception as e:
1180
- logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}")
1181
  return None
1182
 
1183
 
 
20
  import logging
21
  import os
22
  import time
23
+ from typing import Optional
24
 
25
  import requests
26
  from requests import RequestException
 
31
  from App_Function_Libraries.Diarization_Lib import combine_transcription_and_diarization
32
  from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama, summarize_with_kobold, \
33
  summarize_with_oobabooga, summarize_with_tabbyapi, summarize_with_vllm, summarize_with_local_llm
34
+ from App_Function_Libraries.DB.DB_Manager import add_media_to_database
35
  # Import Local
36
+ from App_Function_Libraries.Utils.Utils import load_and_log_configs, load_comprehensive_config, sanitize_filename, \
37
  clean_youtube_url, create_download_directory, is_valid_url
38
  from App_Function_Libraries.Video_DL_Ingestion_Lib import download_video, extract_video_info
39
 
 
44
  config = load_comprehensive_config()
45
  openai_api_key = config.get('API', 'openai_api_key', fallback=None)
46
 
47
+
48
+ def summarize(
49
+ input_data: str,
50
+ custom_prompt_arg: Optional[str],
51
+ api_name: str,
52
+ api_key: Optional[str],
53
+ temp: Optional[float],
54
+ system_message: Optional[str]
55
+ ) -> str:
56
+ try:
57
+ logging.debug(f"api_name type: {type(api_name)}, value: {api_name}")
58
+ if api_name.lower() == "openai":
59
+ return summarize_with_openai(api_key, input_data, custom_prompt_arg, temp, system_message)
60
+ elif api_name.lower() == "anthropic":
61
+ return summarize_with_anthropic(api_key, input_data, custom_prompt_arg, temp, system_message)
62
+ elif api_name.lower() == "cohere":
63
+ return summarize_with_cohere(api_key, input_data, custom_prompt_arg, temp, system_message)
64
+ elif api_name.lower() == "groq":
65
+ return summarize_with_groq(api_key, input_data, custom_prompt_arg, temp, system_message)
66
+ elif api_name.lower() == "huggingface":
67
+ return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp)
68
+ elif api_name.lower() == "openrouter":
69
+ return summarize_with_openrouter(api_key, input_data, custom_prompt_arg, temp, system_message)
70
+ elif api_name.lower() == "deepseek":
71
+ return summarize_with_deepseek(api_key, input_data, custom_prompt_arg, temp, system_message)
72
+ elif api_name.lower() == "mistral":
73
+ return summarize_with_mistral(api_key, input_data, custom_prompt_arg, temp, system_message)
74
+ elif api_name.lower() == "llama.cpp":
75
+ return summarize_with_llama(input_data, custom_prompt_arg, temp, system_message)
76
+ elif api_name.lower() == "kobold":
77
+ return summarize_with_kobold(input_data, api_key, custom_prompt_arg, temp, system_message)
78
+ elif api_name.lower() == "ooba":
79
+ return summarize_with_oobabooga(input_data, api_key, custom_prompt_arg, temp, system_message)
80
+ elif api_name.lower() == "tabbyapi":
81
+ return summarize_with_tabbyapi(input_data, custom_prompt_arg, temp, system_message)
82
+ elif api_name.lower() == "vllm":
83
+ return summarize_with_vllm(input_data, custom_prompt_arg, None, system_message)
84
+ elif api_name.lower() == "local-llm":
85
+ return summarize_with_local_llm(input_data, custom_prompt_arg, temp, system_message)
86
+ elif api_name.lower() == "huggingface":
87
+ return summarize_with_huggingface(api_key, input_data, custom_prompt_arg, temp, )#system_message)
88
+ else:
89
+ return f"Error: Invalid API Name {api_name}"
90
+
91
+ except Exception as e:
92
+ logging.error(f"Error in summarize function: {str(e)}", exc_info=True)
93
+ return f"Error: {str(e)}"
94
+
95
+
96
  def extract_text_from_segments(segments):
97
  logging.debug(f"Segments received: {segments}")
98
  logging.debug(f"Type of segments: {type(segments)}")
 
115
 
116
  def summarize_with_openai(api_key, input_data, custom_prompt_arg, temp=None, system_message=None):
117
  loaded_config_data = load_and_log_configs()
 
118
  try:
119
  # API key validation
120
+ if not api_key or api_key.strip() == "":
121
  logging.info("OpenAI: #1 API key not provided as parameter")
122
  logging.info("OpenAI: Attempting to use API key from config file")
123
  api_key = loaded_config_data['api_keys']['openai']
124
 
125
+ if not api_key or api_key.strip() == "":
126
  logging.error("OpenAI: #2 API key not found or is empty")
127
  return "OpenAI: API Key Not Provided/Found in Config file or is empty"
128
 
129
+ openai_api_key = api_key
130
  logging.debug(f"OpenAI: Using API Key: {api_key[:5]}...{api_key[-5:]}")
131
 
132
  # Input data handling
 
171
  else:
172
  raise ValueError(f"OpenAI: Invalid input data format: {type(data)}")
173
 
 
174
  logging.debug(f"OpenAI: Extracted text (first 500 chars): {text[:500]}...")
175
  logging.debug(f"OpenAI: Custom prompt: {custom_prompt_arg}")
176
 
 
254
  if not anthropic_api_key or not anthropic_api_key.strip():
255
  logging.error("Anthropic: No valid API key available")
256
  # You might want to raise an exception here or handle this case as appropriate for your application
257
+ #FIXME
258
  # For example: raise ValueError("No valid Anthropic API key available")
259
 
260
 
 
394
  if not cohere_api_key or not cohere_api_key.strip():
395
  logging.error("Cohere: No valid API key available")
396
  # You might want to raise an exception here or handle this case as appropriate for your application
397
+ # FIXME
398
  # For example: raise ValueError("No valid Anthropic API key available")
399
 
400
  if custom_prompt_arg is None:
 
506
  if not groq_api_key or not groq_api_key.strip():
507
  logging.error("Anthropic: No valid API key available")
508
  # You might want to raise an exception here or handle this case as appropriate for your application
509
+ # FIXME
510
  # For example: raise ValueError("No valid Anthropic API key available")
511
 
512
  logging.debug(f"Groq: Using API Key: {groq_api_key[:5]}...{groq_api_key[-5:]}")
 
735
  if not huggingface_api_key or not huggingface_api_key.strip():
736
  logging.error("HuggingFace: No valid API key available")
737
  # You might want to raise an exception here or handle this case as appropriate for your application
738
+ # FIXME
739
  # For example: raise ValueError("No valid Anthropic API key available")
740
 
741
 
 
786
  response = requests.post(API_URL, headers=headers, json=data)
787
 
788
  if response.status_code == 200:
789
+ summary = response.json()[0]['generated_text'].strip()
790
  logging.debug("huggingface: Summarization successful")
791
  print("Summarization successful.")
792
  return summary
 
825
  if not deepseek_api_key or not deepseek_api_key.strip():
826
  logging.error("DeepSeek: No valid API key available")
827
  # You might want to raise an exception here or handle this case as appropriate for your application
828
+ # FIXME
829
  # For example: raise ValueError("No valid deepseek API key available")
830
 
831
 
 
931
  if not mistral_api_key or not mistral_api_key.strip():
932
  logging.error("Mistral: No valid API key available")
933
  # You might want to raise an exception here or handle this case as appropriate for your application
934
+ # FIXME
935
  # For example: raise ValueError("No valid deepseek API key available")
936
 
937
 
 
1200
  def summarize_chunk(api_name, text, custom_prompt_input, api_key, temp=None, system_message=None):
1201
  logging.debug("Entered 'summarize_chunk' function")
1202
  try:
1203
+ result = summarize(text, custom_prompt_input, api_name, api_key, temp, system_message)
1204
+ if result is None or result.startswith("Error:"):
1205
+ logging.warning(f"Summarization with {api_name} failed: {result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1206
  return None
1207
+ logging.info(f"Summarization with {api_name} successful")
1208
+ return result
1209
  except Exception as e:
1210
+ logging.error(f"Error in summarize_chunk with {api_name}: {str(e)}", exc_info=True)
1211
  return None
1212
 
1213