fmab777 commited on
Commit
15a941c
·
verified ·
1 Parent(s): 2f0ec37

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +260 -618
main.py CHANGED
@@ -53,7 +53,6 @@ except ImportError:
53
  # --- Google Gemini ---
54
  try:
55
  import google.generativeai as genai
56
- # Import specific types needed, check library for exact names if errors occur
57
  from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerateContentResponse
58
  _gemini_available = True
59
  except ImportError:
@@ -74,7 +73,6 @@ logging.getLogger('gunicorn.error').setLevel(logging.INFO)
74
  logging.getLogger('uvicorn').setLevel(logging.INFO)
75
  logging.getLogger('starlette').setLevel(logging.INFO)
76
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
77
- # Suppress noisy crawl4ai/playwright logs if needed
78
  logging.getLogger("crawl4ai").setLevel(logging.INFO) # Adjust level as needed
79
  logging.getLogger("playwright").setLevel(logging.WARNING)
80
 
@@ -113,7 +111,6 @@ GEMINI_API_KEY = get_secret('GEMINI_API_KEY') # Primary Summarizer
113
  # Models (User can still configure via env vars)
114
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
115
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
116
- # *** Reverted Model Name as requested ***
117
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model
118
 
119
  # --- Configuration Checks ---
@@ -159,14 +156,10 @@ if _gemini_primary_enabled:
159
  _gemini_primary_enabled = False
160
 
161
  # --- Constants ---
162
- MAX_SUMMARY_CHUNK_SIZE = 4000 # Max characters per Telegram message (allow buffer)
163
- # Adjust based on gemini-2.0-flash context window if known, 1M was for 1.5 Flash
164
- # Let's use a more conservative estimate for 2.0 Flash, e.g., 128k tokens ~ 500k chars? Check Gemini docs.
165
- # Sticking with a large number for now, but be aware this might need adjustment.
166
- MAX_INPUT_TOKEN_APPROX = 500000
167
 
168
  # --- Retry Decorator ---
169
- # (Remains the same)
170
  @retry(
171
  stop=stop_after_attempt(4),
172
  wait=wait_exponential(multiplier=1, min=2, max=15),
@@ -175,26 +168,20 @@ MAX_INPUT_TOKEN_APPROX = 500000
175
  reraise=True
176
  )
177
  async def retry_bot_operation(func, *args, **kwargs):
 
178
  try:
179
  return await func(*args, **kwargs)
180
  except BadRequest as e:
181
- ignore_errors = [
182
- "message is not modified", "query is too old", "message to edit not found",
183
- "chat not found", "bot was blocked by the user",
184
- ]
185
  if any(err in str(e).lower() for err in ignore_errors):
186
  logger.warning(f"Ignoring non-critical BadRequest: {e}")
187
  return None
188
- # Only raise if it's not an ignored error
189
  logger.error(f"Potentially critical BadRequest: {e}")
190
  raise
191
  except TelegramError as e:
192
- # Log specific transient errors that might trigger retry
193
- if isinstance(e, (TimedOut, NetworkError, RetryAfter)):
194
- logger.warning(f"Telegram transient error (will retry): {e}")
195
- else:
196
- logger.error(f"Unhandled TelegramError: {e}") # Log other Telegram errors
197
- raise # Reraise to allow tenacity to handle retry
198
  except Exception as e:
199
  logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
200
  raise
@@ -202,9 +189,11 @@ async def retry_bot_operation(func, *args, **kwargs):
202
  # --- Helper Functions ---
203
  # (is_youtube_url, extract_youtube_id remain the same)
204
  def is_youtube_url(url):
 
205
  youtube_regex = re.compile( r'(?:https?://)?(?:www.)?(?:m.)?(?:youtube(?:-nocookie)?.com|youtu.be)/' r'(?:watch?v=|embed/|v/|shorts/|live/|attribution_link?a=.&u=/watch?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
206
  match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
207
  def extract_youtube_id(url):
 
208
  youtube_regex = re.compile( r'(?:https?://)?(?:www.)?(?:m.)?(?:youtube(?:-nocookie)?.com|youtu.be)/' r'(?:watch?v=|embed/|v/|shorts/|live/|attribution_link?a=.&u=/watch?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
209
  match = youtube_regex.search(url)
210
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
@@ -246,7 +235,6 @@ async def get_transcript_via_supadata(video_id: str, api_key: str) -> Optional[s
246
  return None
247
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
248
 
249
-
250
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
251
  # ... (Keep existing implementation) ...
252
  global APIFY_ACTOR_ID
@@ -295,7 +283,6 @@ async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[s
295
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
296
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
297
 
298
-
299
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
300
  # ... (Keep existing implementation) ...
301
  global SUPADATA_API_KEY, APIFY_API_TOKEN
@@ -304,11 +291,9 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
304
  transcript_text = None
305
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
306
  try:
307
- # Run the blocking call in a separate thread
308
  transcript_list = await asyncio.to_thread(
309
  YouTubeTranscriptApi.list_transcripts(video_id).find_generated_transcript(['en', 'en-GB', 'en-US']).fetch
310
  )
311
- # transcript_list = await asyncio.to_thread( YouTubeTranscriptApi.get_transcript, video_id, languages=['en', 'en-GB', 'en-US'] ) # Old way
312
  if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
313
  if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text.strip()
314
  else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
@@ -320,14 +305,13 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
320
  transcript_text = None
321
  except Exception as e:
322
  logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}")
323
- # if isinstance(e, (NoTranscriptFound, TranscriptsDisabled)): logger.warning(f"[Primary YT] Known issue: {type(e).__name__}") # Handled above
324
  transcript_text = None
325
 
326
  if transcript_text is None:
327
  logger.info("[Fallback YT 1] Trying Supadata API...")
328
  if SUPADATA_API_KEY:
329
  transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
330
- if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text # Already stripped
331
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
332
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
333
 
@@ -335,12 +319,11 @@ async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]
335
  logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
336
  if APIFY_API_TOKEN:
337
  transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
338
- if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text # Already stripped
339
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
340
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
341
 
342
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
343
- # This return was missing, ensuring the final value is returned if fallbacks succeed
344
  return transcript_text
345
 
346
 
@@ -358,22 +341,11 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
358
  return None
359
 
360
  logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
361
- # Define a writable cache directory (use /tmp in container environments)
362
- # No longer creating dir here, relying on HOME=/tmp from Dockerfile
363
- # cache_dir_path = "/tmp/.crawl4ai"
364
- # try:
365
- # os.makedirs(cache_dir_path, exist_ok=True)
366
- # logger.info(f"[Crawl4AI Primary] Ensured cache directory exists: {cache_dir_path}")
367
- # except OSError as e:
368
- # logger.error(f"[Crawl4AI Primary] Failed to create cache directory {cache_dir_path}: {e}. Crawl may fail.")
369
- # except Exception as e:
370
- # logger.error(f"[Crawl4AI Primary] Unexpected error creating cache directory {cache_dir_path}: {e}")
371
 
372
  try:
373
- # Use AsyncWebCrawler context manager.
374
- # Removed explicit cache_dir, relying on HOME=/tmp from Dockerfile
375
- async with AsyncWebCrawler() as crawler:
376
- logger.info(f"[Crawl4AI Primary] Initialized crawler (HOME should be /tmp).")
377
  # Use arun for a single URL crawl
378
  result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
379
 
@@ -384,14 +356,13 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
384
  return content
385
  else:
386
  logger.warning(f"[Crawl4AI Primary] Crawl successful for {url}, but extracted markdown content is empty.")
387
- # Try result.text as a fallback within crawl4ai success
388
  if result.text:
389
  content = result.text.strip()
390
  if content:
391
  logger.info(f"[Crawl4AI Primary] Using .text fallback after empty markdown. Length: {len(content)}")
392
  return content
393
  return None # Return None if both markdown and text are empty
394
- elif result and result.text: # Fallback to raw text if markdown is somehow missing entirely
395
  content = result.text.strip()
396
  if content:
397
  logger.info(f"[Crawl4AI Primary] Success crawling {url} (using .text, markdown missing). Length: {len(content)}")
@@ -405,13 +376,11 @@ async def get_website_content_via_crawl4ai(url: str) -> Optional[str]:
405
  except asyncio.TimeoutError:
406
  logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
407
  return None
408
- except PermissionError as e: # Catch the specific error
409
- # Log more detail if possible
410
- logger.error(f"[Crawl4AI Primary] Permission denied during crawl for {url}. Target path: '{e.filename}'. Likely filesystem issue or HOME=/tmp not effective. Error: {e}", exc_info=True)
411
- return None # Fail gracefully for this method
412
  except Exception as e:
413
  logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
414
- # Log specific crawl4ai errors if they become apparent
415
  return None
416
 
417
 
@@ -427,7 +396,7 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
427
  response.raise_for_status()
428
  content_type = response.headers.get('content-type', '').lower()
429
  if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
430
- try: return response.text # Use response.text to let httpx handle decoding
431
  except Exception as e: logger.error(f"[Web Scrape BS4] Error getting response text for {url}: {e}"); return None
432
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
433
  except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
@@ -437,7 +406,6 @@ async def fetch_url_content_for_scrape(url: str, timeout: int = 25) -> Optional[
437
  return None
438
 
439
  async def get_website_content_bs4(url: str) -> Optional[str]:
440
- """Fetches and parses website content using BeautifulSoup (Fallback 1)."""
441
  # ... (Keep existing implementation) ...
442
  if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
443
  logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
@@ -445,39 +413,24 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
445
  if not html_content:
446
  logger.warning(f"[BS4 Fallback] Failed to fetch HTML for {url}")
447
  return None
448
-
449
  try:
450
- # Inner function for parsing to use asyncio.to_thread
451
  def parse_html(content):
452
  soup = BeautifulSoup(content, DEFAULT_PARSER)
453
- # Remove common non-content elements
454
- for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio"]):
455
- element.extract()
456
- # Try to find main content areas more broadly
457
- selectors = ['main', 'article', '[role="main"]', '#content', '.content', '#main-content', '.main-content', '#body', '.body', '#article-body', '.article-body', '.post-content', '.entry-content'] # Added more common selectors
458
  target_element = None
459
  for selector in selectors:
460
  try:
461
  target_element = soup.select_one(selector)
462
  if target_element: break
463
- except Exception as sel_e: # Catch potential invalid CSS selectors from list
464
- logger.warning(f"[BS4 Fallback] Invalid selector '{selector}': {sel_e}")
465
- continue
466
-
467
-
468
- if not target_element: target_element = soup.body # Fallback to body
469
  if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
470
-
471
- # Extract text, clean up whitespace aggressively
472
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
473
- text = " ".join(lines) # Join lines with spaces
474
-
475
- # Basic post-cleaning
476
- text = re.sub(r'\s{2,}', ' ', text).strip() # Replace multiple spaces with single space
477
-
478
  if not text: logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}"); return None
479
  return text
480
-
481
  text_content = await asyncio.to_thread(parse_html, html_content)
482
  if text_content:
483
  logger.info(f"[BS4 Fallback] Success scrape/parse for {url} (final len: {len(text_content)})")
@@ -491,7 +444,6 @@ async def get_website_content_bs4(url: str) -> Optional[str]:
491
 
492
  # Fallback 2: urltotext.com API
493
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
494
- """Fetches website content using urltotext.com API (Fallback 2)."""
495
  # ... (Keep existing implementation) ...
496
  if not url: logger.error("[API Fallback] No URL"); return None
497
  if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
@@ -513,8 +465,10 @@ async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
513
  else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
514
  except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
515
  except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
516
- elif response.status_code == 402: # Specifically handle insufficient credits
517
- logger.error(f"[API Fallback] Error 402 (Insufficient Credits) from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
 
 
518
  elif response.status_code in [400, 401, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
519
  else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
520
  except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
@@ -529,188 +483,150 @@ async def _call_gemini(text: str, summary_type: str) -> Tuple[Optional[str], Opt
529
  logger.error("[Gemini Primary] Called but is disabled.");
530
  return None, "Error: Primary AI service (Gemini) not configured/available."
531
 
532
- # Truncate input text if it exceeds the approximate limit
533
  if len(text) > MAX_INPUT_TOKEN_APPROX:
534
  logger.warning(f"[Gemini Primary] Input text length ({len(text)}) exceeds limit ({MAX_INPUT_TOKEN_APPROX}). Truncating.")
535
  text = text[:MAX_INPUT_TOKEN_APPROX]
536
 
537
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
538
 
539
- # Define prompts
540
  if summary_type == "paragraph":
541
- prompt = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information. Avoid unnecessary jargon or overly complex sentences.
542
-
543
- Text to summarise:
544
- ---
545
- {text}
546
- ---
547
-
548
- Concise Paragraph Summary:"""
549
  elif summary_type == "points":
550
- prompt = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea or important piece of information. Aim for clarity and conciseness.
551
-
552
- Text to summarise:
553
- ---
554
- {text}
555
- ---
556
-
557
- Key Bullet Points Summary:"""
558
  else:
559
  logger.error(f"[Gemini Primary] Invalid summary_type: {summary_type}")
560
  return None, f"Error: Invalid summary type '{summary_type}' specified."
561
 
562
- # Configure safety settings (adjust as needed)
563
- safety_settings = {
564
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
565
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
566
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
567
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
568
- }
 
 
 
 
569
 
570
- # Configure generation settings (optional)
571
  generation_config = genai.types.GenerationConfig(
572
- # candidate_count=1, # Default is 1
573
- # stop_sequences=["\n"],
574
- max_output_tokens=2048, # Increased max tokens
575
- temperature=0.7, # Adjust creativity vs factualness
576
- # top_p=1.0, # Default
577
- # top_k=None # Default
578
  )
579
 
580
  try:
581
  model = genai.GenerativeModel(GEMINI_MODEL)
582
  logger.debug(f"[Gemini Primary] Sending request to model {GEMINI_MODEL}")
583
- response: GenerateContentResponse = await model.generate_content_async( # Use async version
584
  prompt,
585
  generation_config=generation_config,
586
  safety_settings=safety_settings,
587
- # request_options={"timeout": 100} # Optional: Add timeout for the API call itself
588
  )
589
 
590
- # Check for safety blocks or other issues in response
591
  if not response.candidates:
592
  block_reason = "Unknown"
 
593
  if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
594
  block_reason = response.prompt_feedback.block_reason or "Reason not specified"
595
- error_msg = f"Error: Gemini response blocked or empty. Reason: {block_reason}"
 
 
596
  logger.error(f"[Gemini Primary] {error_msg}")
597
  return None, error_msg
598
 
599
- # *** FIXED FinishReason Check ***
600
- # Access finish_reason from the first candidate
601
- finish_reason_val = response.candidates[0].finish_reason
602
- logger.debug(f"[Gemini Primary] Received response. Finish reason value: {finish_reason_val}")
603
-
604
- # Convert the finish reason (might be enum/int/string) to uppercase string for reliable comparison
605
- finish_reason_str = str(finish_reason_val).upper()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
 
607
- # Check if it's NOT a successful or expected non-error finish
608
- # Expected "good" finishes: STOP, MAX_TOKENS
609
- # Problematic finishes: SAFETY, RECITATION, OTHER, etc.
610
- if finish_reason_str not in ["STOP", "MAX_TOKENS"]:
611
- # Log safety ratings if available
612
- safety_ratings_str = "N/A"
613
- if hasattr(response.candidates[0], 'safety_ratings') and response.candidates[0].safety_ratings:
614
- safety_ratings_str = ', '.join([f"{r.category.name}: {r.probability.name}" for r in response.candidates[0].safety_ratings])
615
-
616
- error_msg = f"Error: Gemini generation finished unexpectedly. Reason: {finish_reason_str}. Safety: {safety_ratings_str}"
617
- logger.error(f"[Gemini Primary] {error_msg}")
618
 
619
- # Decide how to handle specific non-STOP reasons
620
- if finish_reason_str == "SAFETY":
621
- return None, error_msg # Return specific error for safety blocks
622
- # For RECITATION, OTHER, etc., it's safer to return an error than potentially broken output
623
- return None, f"Error: Gemini generation finished unexpectedly ({finish_reason_str})."
624
 
625
- # Extract text (use response.text shortcut if available and reliable)
626
  summary_text = ""
627
  try:
628
- # response.text might combine parts automatically, safer if available
629
- summary_text = response.text
630
  except ValueError as e:
631
- # Handle cases where .text might raise error (e.g., if blocked, but caught above ideally)
632
  logger.warning(f"[Gemini Primary] Error accessing response.text: {e}. Trying parts manually.")
633
- # Fallback to joining parts if .text fails (less common now)
634
- if response.candidates and response.candidates[0].content and response.candidates[0].content.parts:
635
- summary_text = "".join(part.text for part in response.candidates[0].content.parts)
636
 
637
  if not summary_text or not summary_text.strip():
638
- logger.warning("[Gemini Primary] Gemini returned an empty summary.")
 
639
  return None, "Error: AI generated an empty summary."
640
 
641
- logger.info(f"[Gemini Primary] Summary generated successfully (len: {len(summary_text)}). Finish: {finish_reason_str}")
642
  return summary_text.strip(), None
643
 
644
  except AttributeError as e:
645
- # Catch potential errors if response structure is different than expected
646
  logger.error(f"[Gemini Primary] Attribute error accessing Gemini response: {e}. Response structure might have changed.", exc_info=True)
647
  return None, f"Error: Failed to parse Gemini response. Details: {e}"
648
  except Exception as e:
649
  logger.error(f"[Gemini Primary] Error during API call to {GEMINI_MODEL}: {e}", exc_info=True)
650
- # Check for specific Google API errors if needed
651
- # from google.api_core import exceptions as google_exceptions
652
- # if isinstance(e, google_exceptions.GoogleAPIError): ...
653
  return None, f"Error: Failed to communicate with the primary AI service (Gemini). Details: {e}"
654
 
655
 
656
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
657
  """ Calls the OpenRouter API to generate a summary. """
658
- # ... (Keep existing implementation - no changes needed here based on logs) ...
659
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
660
  if not _openrouter_fallback_enabled:
661
  logger.error("[OpenRouter Fallback] Called but is disabled.");
662
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
663
 
664
- max_input_len_openrouter = 100000 # Adjust based on OPENROUTER_MODEL limits if known (Deepseek is large)
665
  if len(text) > max_input_len_openrouter:
666
  logger.warning(f"[OpenRouter Fallback] Input text length ({len(text)}) exceeds approx limit ({max_input_len_openrouter}) for {OPENROUTER_MODEL}. Truncating.")
667
  text = text[:max_input_len_openrouter]
668
 
669
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
670
 
671
- # Define prompts (similar structure to Gemini)
672
- if summary_type == "paragraph":
673
- prompt_content = f"""Please summarise the following text into a concise paragraph. Focus on the main points and key information.
674
-
675
- Text:
676
- ---
677
- {text}
678
- ---
679
-
680
- Concise Paragraph Summary:"""
681
- elif summary_type == "points":
682
- prompt_content = f"""Please summarise the following text into a list of key bullet points. Each point should capture a distinct main idea.
683
-
684
- Text:
685
- ---
686
- {text}
687
- ---
688
-
689
- Key Bullet Points Summary:"""
690
- else:
691
- logger.error(f"[OpenRouter Fallback] Invalid summary_type: {summary_type}")
692
- return None, f"Error: Invalid summary type '{summary_type}' specified."
693
-
694
- headers = {
695
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
696
- "Content-Type": "application/json",
697
- "HTTP-Referer": "https://github.com/fmab777/telegram-summary-bot", # Optional: Identify your app
698
- "X-Title": "Telegram Summary Bot", # Optional: Identify your app
699
- }
700
- payload = {
701
- "model": OPENROUTER_MODEL,
702
- "messages": [
703
- {"role": "system", "content": "You are an expert summarizer. Provide summaries as requested."},
704
- {"role": "user", "content": prompt_content}
705
- ],
706
- "max_tokens": 2048, # Adjust as needed
707
- "temperature": 0.7,
708
- }
709
 
 
 
710
  api_url = "https://openrouter.ai/api/v1/chat/completions"
711
 
712
  try:
713
- async with httpx.AsyncClient(timeout=120.0) as client: # Longer timeout for potentially slower models
714
  logger.debug(f"[OpenRouter Fallback] Sending request to {api_url} for model {OPENROUTER_MODEL}")
715
  response = await client.post(api_url, headers=headers, json=payload)
716
  logger.debug(f"[OpenRouter Fallback] Received status code {response.status_code}")
@@ -721,101 +637,64 @@ Key Bullet Points Summary:"""
721
  if data.get("choices") and len(data["choices"]) > 0:
722
  choice = data["choices"][0]
723
  message = choice.get("message")
724
- finish_reason = choice.get("finish_reason", "N/A") # Get finish reason
725
-
726
  if message and message.get("content"):
727
  summary_text = message["content"].strip()
728
  if summary_text:
729
  logger.info(f"[OpenRouter Fallback] Summary generated successfully (len: {len(summary_text)}). Finish: {finish_reason}")
730
- # Check for length finish reason?
731
- if finish_reason == 'length':
732
- logger.warning("[OpenRouter Fallback] Summary may be truncated due to max_tokens limit.")
733
  return summary_text, None
734
- else:
735
- logger.warning("[OpenRouter Fallback] OpenRouter returned an empty summary content.")
736
- return None, "Error: Fallback AI generated an empty summary."
737
- else:
738
- logger.error(f"[OpenRouter Fallback] Invalid response structure (missing message/content). Data: {data}")
739
- return None, "Error: Fallback AI returned an invalid response format."
740
  else:
741
  logger.error(f"[OpenRouter Fallback] Invalid response structure (missing choices). Data: {data}")
742
- # Check for error object in response
743
- api_error = data.get("error", {}).get("message", "Unknown API error")
744
- return None, f"Error: Fallback AI response missing summary. API msg: {api_error}"
745
-
746
- except json.JSONDecodeError:
747
- logger.error(f"[OpenRouter Fallback] Failed to decode JSON response. Status: {response.status_code}, Text: {response.text[:500]}")
748
- return None, "Error: Fallback AI sent an invalid JSON response."
749
- except Exception as e:
750
- logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True)
751
- return None, f"Error: Failed to process Fallback AI response. Details: {e}"
752
-
753
  else:
754
- # Handle API errors (rate limits, auth, etc.)
755
  error_message = f"Error: Fallback AI service ({OPENROUTER_MODEL}) returned status {response.status_code}."
756
- try:
757
- error_details = response.json().get("error", {}).get("message", response.text[:200])
758
- error_message += f" Details: {error_details}"
759
- except Exception:
760
- error_message += f" Response: {response.text[:200]}"
761
- logger.error(f"[OpenRouter Fallback] {error_message}")
762
- return None, error_message
763
-
764
- except httpx.TimeoutException:
765
- logger.error(f"[OpenRouter Fallback] Timeout connecting to OpenRouter API for {OPENROUTER_MODEL}")
766
- return None, "Error: Timed out connecting to the fallback AI service."
767
- except httpx.RequestError as e:
768
- logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}")
769
- return None, f"Error: Network error connecting to the fallback AI service. Details: {e}"
770
- except Exception as e:
771
- logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter API call: {e}", exc_info=True)
772
- return None, f"Error: Unexpected issue with the fallback AI service. Details: {e}"
773
 
774
 
775
  async def generate_summary(text: str, summary_type: str) -> str:
776
  """ Generates a summary using the primary AI (Gemini) and falling back to OpenRouter. """
777
- # ... (Keep existing implementation - no changes needed here based on logs) ...
778
  global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
779
  logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
780
- final_summary: Optional[str] = None
781
- error_message: Optional[str] = None # Accumulates errors
782
-
783
- # --- Attempt Primary AI (Gemini) ---
784
  if _gemini_primary_enabled:
785
  logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
786
  primary_summary, primary_error = await _call_gemini(text, summary_type)
787
  if primary_summary:
788
  logger.info(f"[Summary Generation] Success with primary AI (Gemini).")
789
- return primary_summary # Return successful primary summary immediately
790
  else:
791
  logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error}. Proceeding to fallback.")
792
- error_message = f"Primary AI ({GEMINI_MODEL}) failed: {primary_error}" # Store primary error
793
  else:
794
  logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
795
  error_message = "Primary AI (Gemini) unavailable."
796
 
797
- # --- Attempt Fallback AI (OpenRouter) ---
798
  if _openrouter_fallback_enabled:
799
  logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
800
  fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
801
  if fallback_summary:
802
  logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter).")
803
- return fallback_summary # Return successful fallback summary
804
  else:
805
  logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
806
- # Combine errors for final message
807
- if error_message: # If primary also failed
808
- return f"{error_message}\nFallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
809
- else: # Should not happen if logic is correct, but fallback just in case
810
- return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
811
  else:
812
  logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
813
- if error_message: # Primary failed AND fallback disabled
814
- return f"{error_message}\nFallback AI is also unavailable."
815
- else: # Primary disabled AND fallback disabled
816
- return "Error: Both primary and fallback AI services are unavailable."
817
 
818
- # This part should ideally not be reached if the logic above is sound
819
  logger.error("[Summary Generation] Reached end of function unexpectedly. No summary generated.")
820
  final_error = error_message or "Unknown summary generation error."
821
  return f"Sorry, an error occurred: {final_error}"
@@ -831,7 +710,7 @@ async def process_summary_task(
831
  bot_token: str
832
  ) -> None:
833
  """Handles fetching content, generating summary, and sending results."""
834
- # ... (Keep existing implementation - no changes needed here based on logs) ...
835
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
836
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
837
  background_request: Optional[BaseRequest] = None
@@ -840,43 +719,25 @@ async def process_summary_task(
840
  user_feedback_message: Optional[str] = None
841
  success = False
842
  status_message_id = message_id_to_edit # Keep track of the original button message ID
843
- message_to_delete_later_id: Optional[int] = None # For temporary "Processing..." messages
844
 
845
  try:
846
- # Initialize background bot
847
  background_request = HTTPXRequest(connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0)
848
  bot = Bot(token=bot_token, request=background_request)
849
  except Exception as e:
850
- logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True)
851
- # Cannot send feedback without bot
852
- return
853
 
854
  try:
855
- # Send/Edit "Processing..." message
856
- # We *always* edit the original button message first
857
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n{html.escape(url)}\n\nThis might take a moment..."
858
- edited_original_msg = False
859
- if status_message_id: # If we have the ID of the message with buttons
860
  try:
861
- await retry_bot_operation(
862
- bot.edit_message_text,
863
- chat_id=chat_id,
864
- message_id=status_message_id,
865
- text=processing_message_text,
866
- parse_mode=ParseMode.HTML, # Use HTML for escaped URL
867
- reply_markup=None, # Remove buttons
868
- link_preview_options={'is_disabled': True} # Disable preview here too
869
- )
870
  logger.debug(f"[Task {task_id}] Edited original button message {status_message_id} to 'Processing'")
871
- edited_original_msg = True
872
  except Exception as e:
873
- logger.warning(f"[Task {task_id}] Could not edit original button message {status_message_id}: {e}. Will proceed without initial status update on that message.")
874
- # Don't send a new message here, the final result/error will handle it.
875
- # Keep status_message_id so we know the original message still exists.
876
 
877
- # Indicate activity
878
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
879
- except Exception: pass # Non-critical if this fails
880
 
881
  # --- Determine URL Type and Fetch Content ---
882
  is_youtube = is_youtube_url(url)
@@ -884,43 +745,28 @@ async def process_summary_task(
884
 
885
  if is_youtube:
886
  video_id = extract_youtube_id(url)
887
- if video_id:
888
- content = await get_youtube_transcript(video_id, url) # Uses its own internal fallbacks
889
- else:
890
- user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
891
-
892
- if not content and not user_feedback_message:
893
- user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video using any available method (unavailable/private/no captions?)."
894
  else:
895
- # Website URL - New Fetching Logic
896
  logger.info(f"[Task {task_id}] URL is website. Attempting Crawl4AI (Primary)...")
897
  content = await get_website_content_via_crawl4ai(url)
898
-
899
  if not content:
900
- # Log the failure reason from Crawl4AI if available (already logged in the function)
901
- logger.warning(f"[Task {task_id}] Crawl4AI failed for {url}. Attempting BeautifulSoup (Fallback 1)...")
902
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
903
  except Exception: pass
904
  content = await get_website_content_bs4(url)
905
-
906
  if not content:
907
- logger.warning(f"[Task {task_id}] BeautifulSoup also failed for {url}. Attempting API (Fallback 2)...")
908
  global URLTOTEXT_API_KEY, _urltotext_fallback_enabled
909
  if _urltotext_fallback_enabled:
910
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
911
  except Exception: pass
912
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
913
- if not content:
914
- logger.error(f"[Task {task_id}] API fallback (urltotext) also failed for {url}.")
915
- user_feedback_message = "Sorry, I couldn't fetch content from that website using any method (Crawl4AI/BS4 failed, API failed or ran out of credits)." # Updated message
916
- else:
917
- logger.warning(f"[Task {task_id}] API fallback is disabled. Cannot attempt Fallback 2.")
918
- user_feedback_message = "Sorry, I couldn't fetch content from that website using Crawl4AI or BeautifulSoup, and the API fallback is not enabled." # Updated message
919
-
920
- # Final check if all web methods failed
921
- if not content and not user_feedback_message:
922
- logger.error(f"[Task {task_id}] All website fetching methods seem to have failed without setting a specific user message.")
923
- user_feedback_message = "Sorry, I couldn't fetch content from that website using any available method (blocked/inaccessible/empty?)."
924
 
925
 
926
  # --- Generate Summary if Content was Fetched ---
@@ -928,175 +774,99 @@ async def process_summary_task(
928
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary type: '{summary_type}'.")
929
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
930
  except Exception: pass
931
-
932
- final_summary = await generate_summary(content, summary_type) # Calls Gemini -> OpenRouter
933
 
934
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
935
- user_feedback_message = final_summary # Pass AI error message to user
936
  logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
937
- success = False # Explicitly mark as failure
938
  else:
939
- # Success - Split and Send the summary
940
  summary_parts = []
941
- current_part = ""
942
- # Split respecting newlines, ensure no part exceeds MAX_SUMMARY_CHUNK_SIZE
943
- lines = final_summary.splitlines(keepends=True)
944
  for line in lines:
945
  if len(current_part) + len(line) > MAX_SUMMARY_CHUNK_SIZE:
946
- if current_part.strip(): # Don't add empty parts
947
- summary_parts.append(current_part.strip())
948
- current_part = line
949
- if len(current_part) > MAX_SUMMARY_CHUNK_SIZE:
950
- logger.warning(f"[Task {task_id}] Truncating overly long line in summary.")
951
- current_part = current_part[:MAX_SUMMARY_CHUNK_SIZE]
952
- else:
953
- current_part += line
954
  if current_part.strip(): summary_parts.append(current_part.strip())
955
- if not summary_parts:
956
- summary_parts.append("Summary generated, but it appears to be empty.")
957
- logger.warning(f"[Task {task_id}] Summary was non-empty initially but splitting resulted in zero parts.")
958
 
959
  logger.info(f"[Task {task_id}] Summary generated (orig len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
960
 
961
- # Try to edit the original button message with the *first* part of the summary
962
  edited_final_result = False
963
- if status_message_id: # If we have the original button message ID
964
  try:
965
- await retry_bot_operation(
966
- bot.edit_message_text,
967
- chat_id=chat_id,
968
- message_id=status_message_id,
969
- text=summary_parts[0],
970
- parse_mode=None, # Send as plain text initially, safer
971
- link_preview_options={'is_disabled': True}
972
- )
973
- logger.debug(f"[Task {task_id}] Edited original button message {status_message_id} with first summary part.")
974
  edited_final_result = True
975
- except Exception as edit_err:
976
- logger.warning(f"[Task {task_id}] Failed to edit original button message {status_message_id} with summary part 1: {edit_err}. Sending as new message.")
977
- # Original message remains as "Processing..." or its initial state if first edit failed
978
-
979
- # If editing failed, or if there was no original message_id (shouldn't happen), send first part as new message
980
  if not edited_final_result:
981
- sent_msg = await retry_bot_operation(
982
- bot.send_message,
983
- chat_id=chat_id,
984
- text=summary_parts[0],
985
- parse_mode=None,
986
- link_preview_options={'is_disabled': True}
987
- )
988
- if not sent_msg:
989
- logger.error(f"[Task {task_id}] Failed to send first summary part even as new message.")
990
- user_feedback_message = "Sorry, failed to send the summary."
991
- success = False
992
-
993
- # Send remaining parts (if any and first part succeeded)
994
- if success and len(summary_parts) > 1: # Check success flag before sending more parts
995
  for i, part in enumerate(summary_parts[1:], start=2):
996
- await asyncio.sleep(0.5) # Small delay between parts
997
  try:
998
- await retry_bot_operation(
999
- bot.send_message,
1000
- chat_id=chat_id,
1001
- text=part,
1002
- parse_mode=None,
1003
- link_preview_options={'is_disabled': True}
1004
- )
1005
  logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
1006
- except Exception as part_err:
1007
- logger.error(f"[Task {task_id}] Failed to send summary part {i}: {part_err}")
1008
- user_feedback_message = f"Sorry, failed to send part {i} of the summary."
1009
- success = False # Mark as failure
1010
- break # Stop sending remaining parts
1011
-
1012
- # Determine overall success based on whether feedback message is set *during this block*
1013
- if not user_feedback_message:
1014
- success = True
1015
-
1016
- # --- Handle Failure Cases (Content Fetching Failed or Summary Failed) ---
1017
- if not success: # Check overall success flag
1018
- if not user_feedback_message: # If success is false but no specific message, set a generic one
1019
- user_feedback_message = "Sorry, an unknown error occurred during processing."
1020
- logger.error(f"[Task {task_id}] Task failed but no specific user_feedback_message was set.")
1021
-
1022
- logger.warning(f"[Task {task_id}] Sending failure/error feedback to user: {user_feedback_message}")
1023
  try:
1024
- # Try editing the original button message with the error
1025
  edited_final_error = False
1026
  if status_message_id:
1027
  try:
1028
- await retry_bot_operation(
1029
- bot.edit_message_text,
1030
- chat_id=chat_id,
1031
- message_id=status_message_id,
1032
- text=user_feedback_message,
1033
- link_preview_options={'is_disabled': True},
1034
- reply_markup=None # Remove buttons
1035
- )
1036
- logger.debug(f"[Task {task_id}] Edited original button message {status_message_id} with failure feedback.")
1037
  edited_final_error = True
1038
- except Exception as edit_err:
1039
- logger.warning(f"[Task {task_id}] Failed to edit original button message {status_message_id} with failure feedback: {edit_err}. Sending as new message.")
1040
-
1041
- # If editing failed or wasn't applicable, send error as a new message
1042
  if not edited_final_error:
1043
- await retry_bot_operation(
1044
- bot.send_message,
1045
- chat_id=chat_id,
1046
- text=user_feedback_message,
1047
- link_preview_options={'is_disabled': True}
1048
- )
1049
  logger.debug(f"[Task {task_id}] Sent failure feedback as new message.")
1050
- except Exception as send_err:
1051
- logger.error(f"[Task {task_id}] Failed even to send failure feedback message: {send_err}")
1052
 
1053
  except Exception as e:
1054
- # Catch-all for unexpected errors during the main processing logic
1055
  logger.error(f"[Task {task_id}] Unexpected error during processing task: {e}", exc_info=True)
1056
- success = False
1057
- user_feedback_message = "Oops! Something went wrong while processing your request. Please try again later."
1058
- if bot: # Ensure bot exists before trying to send
1059
- try:
1060
- # Attempt to send a final error message (maybe edit original if possible?)
1061
- edited_final_crash_error = False
1062
  if status_message_id:
1063
- try:
1064
- await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=user_feedback_message, reply_markup=None, link_preview_options={'is_disabled': True} )
1065
- edited_final_crash_error = True
1066
- except Exception: pass # Ignore error editing here, just try sending new
1067
- if not edited_final_crash_error:
1068
- await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
1069
- except Exception as final_err:
1070
- logger.error(f"[Task {task_id}] Failed to send the final unexpected error feedback: {final_err}")
1071
 
1072
  finally:
1073
- # --- Cleanup ---
1074
- # No automatic deletion needed now. The original message (status_message_id) is either
1075
- # edited with the result/error, or left as is if editing failed. Temporary messages
1076
- # were not introduced in this version of the logic.
1077
-
1078
- # Close the background bot's HTTP client
1079
  if background_request and hasattr(background_request, '_client') and background_request._client:
1080
- try:
1081
- await background_request._client.aclose()
1082
- logger.debug(f"[Task {task_id}] Background bot's HTTPX client closed.")
1083
- except Exception as close_err:
1084
- logger.warning(f"[Task {task_id}] Error closing background bot's client: {close_err}")
1085
-
1086
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
1087
 
1088
 
1089
  # --- Telegram Handlers ---
1090
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
1091
- # No changes needed in these handlers based on logs.
1092
-
1093
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
1094
  user = update.effective_user; mention = user.mention_html()
1095
  if not user or not update.message: return
1096
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /start.")
1097
  await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
1098
 
1099
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
1100
  user = update.effective_user
1101
  if not user or not update.message: return
1102
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
@@ -1104,9 +874,9 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
1104
  "1. Send me any YouTube video link or website URL.\n"
1105
  "2. I'll ask how you want it summarised (paragraph or points).\n"
1106
  "3. Click the button for your choice.\n"
1107
- "4. Wait while I process it!\n\n" # Slightly rephrased
1108
  "⚙️ **Behind the scenes:**\n"
1109
- f"• **Websites:** I try `Crawl4AI` (smart crawl), then `BeautifulSoup` (basic scrape), and `urltotext.com` API (if configured & credits available).\n"
1110
  "• **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if needed.\n"
1111
  f"• **Summaries:** Generated using Google `{GEMINI_MODEL}` (primary) or `{OPENROUTER_MODEL}` (fallback, if configured).\n\n"
1112
  "**Commands:**\n"
@@ -1115,82 +885,51 @@ async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> No
1115
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
1116
 
1117
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
1118
  if not update.message or not update.message.text: return
1119
  url = update.message.text.strip(); user = update.effective_user
1120
  if not user: return
1121
- # Basic URL validation
1122
- url_pattern = re.compile(r'https?://[^\s/$.?#].[^\s]*', re.IGNORECASE) # Slightly improved regex
1123
  if not url_pattern.match(url):
1124
  logger.debug(f"Ignoring non-URL from {user.id}: {url}")
1125
- await update.message.reply_text("Hmm, that doesn't look like a valid web URL. Please make sure it starts with `http://` or `https://` and includes a domain.", parse_mode=ParseMode.MARKDOWN)
1126
- return
1127
  logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
1128
- # Store URL and original message ID in user_data
1129
  context.user_data['url_to_summarize'] = url
1130
  context.user_data['original_message_id'] = update.message.message_id
1131
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
1132
  reply_markup = InlineKeyboardMarkup(keyboard)
1133
- escaped_url = html.escape(url) # Escape URL for HTML display
1134
  await update.message.reply_html( f"Okay, I see this link:\n<code>{escaped_url}</code>\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True )
1135
 
1136
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
 
1137
  query = update.callback_query
1138
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
1139
  user = query.from_user; summary_type = query.data; query_id = query.id
1140
- try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id} ({user.username or 'N/A'})")
1141
- except Exception as e: logger.warning(f"Error answering callback {query_id}: {e}") # Log warning, don't stop
1142
 
1143
  url = context.user_data.get('url_to_summarize')
1144
- message_id_to_edit = query.message.message_id # The message with the buttons
1145
- logger.info(f"User {user.id} ({user.username or 'N/A'}) chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
1146
 
1147
  if not url:
1148
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
1149
- try:
1150
- await query.edit_message_text(text="Sorry, I seem to have lost the context for that link. 🤔 Please send the URL again.", reply_markup=None)
1151
- except BadRequest as e:
1152
- if "message is not modified" in str(e).lower(): pass # Ignore if text is the same
1153
- else: logger.error(f"Failed edit 'URL not found' msg: {e}")
1154
  except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}")
1155
- return # Do not proceed further
1156
 
1157
- # Check necessary configurations before scheduling
1158
  global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
1159
- if not TELEGRAM_TOKEN:
1160
- logger.critical("TG TOKEN missing! Cannot schedule task.")
1161
- try: await query.edit_message_text(text=" Bot configuration error (Token Missing). Cannot proceed.", reply_markup=None)
1162
- except Exception: pass
1163
- return
1164
- if not _gemini_primary_enabled and not _openrouter_fallback_enabled:
1165
- logger.critical("Neither Gemini nor OpenRouter API keys are configured/valid! Cannot summarize.")
1166
- try: await query.edit_message_text(text="❌ AI configuration error: No summarization models are available. Cannot proceed.", reply_markup=None)
1167
- except Exception: pass
1168
- return
1169
- elif not _gemini_primary_enabled: logger.warning("Primary AI (Gemini) is unavailable, will rely on fallback (OpenRouter).")
1170
- elif not _openrouter_fallback_enabled: logger.warning("Fallback AI (OpenRouter) is unavailable, relying on primary (Gemini).")
1171
 
1172
- # Schedule the background task
1173
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit} (URL: {url})")
1174
- # Use asyncio.ensure_future for slightly better practice than create_task directly here
1175
- asyncio.ensure_future(
1176
- process_summary_task(
1177
- user_id=user.id,
1178
- chat_id=query.message.chat_id,
1179
- message_id_to_edit=message_id_to_edit, # Pass the ID of the message with buttons
1180
- url=url,
1181
- summary_type=summary_type,
1182
- bot_token=TELEGRAM_TOKEN # Pass token explicitly
1183
- )
1184
- # Consider adding .set_name(...) if using Python 3.8+ for better debugging
1185
- # .set_name(f"SummaryTask-{user.id}-{message_id_to_edit}")
1186
- )
1187
-
1188
- # Clear context AFTER scheduling the task
1189
  context.user_data.pop('url_to_summarize', None)
1190
  context.user_data.pop('original_message_id', None)
1191
- logger.debug(f"Cleared URL context for user {user.id} after scheduling task.")
1192
-
1193
- # Do not edit the message here - let the task handle its own status updates by editing this message
1194
 
1195
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
1196
  # ... (Keep existing implementation) ...
@@ -1211,7 +950,6 @@ async def setup_bot_config() -> Application:
1211
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
1212
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
1213
  application = ( Application.builder() .token(TELEGRAM_TOKEN) .request(custom_request) .build() )
1214
- # Add Handlers
1215
  application.add_handler(CommandHandler("start", start))
1216
  application.add_handler(CommandHandler("help", help_command))
1217
  url_filter = filters.TEXT & ~filters.COMMAND & (filters.Entity("url") | filters.Entity("text_link") | filters.Regex(r'https?://[^\s]+'))
@@ -1221,21 +959,22 @@ async def setup_bot_config() -> Application:
1221
  logger.info("Telegram application handlers configured."); return application
1222
 
1223
  # --- ASGI Lifespan & Routes ---
1224
- # (lifespan, telegram_webhook remain the same)
1225
  @contextlib.asynccontextmanager
1226
  async def lifespan(app: Starlette):
 
1227
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
1228
  logger.info("ASGI Lifespan: Startup initiated...");
1229
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
1230
  try:
1231
  ptb_app = await setup_bot_config()
1232
- await ptb_app.initialize() # This sets the _initialized flag
1233
  bot_info = await ptb_app.bot.get_me()
1234
  logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
1235
 
1236
- # Webhook setup logic (remains the same)
1237
  current_webhook_info = await ptb_app.bot.get_webhook_info()
1238
- webhook_delete_success = True # Assume success unless proven otherwise
1239
  if current_webhook_info and current_webhook_info.url:
1240
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
1241
  try:
@@ -1243,212 +982,115 @@ async def lifespan(app: Starlette):
1243
  else: logger.warning("Failed delete webhook (API returned False)."); webhook_delete_success = False
1244
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1); webhook_delete_success = False
1245
 
1246
- # Determine Webhook URL
1247
  space_host = os.environ.get("SPACE_HOST")
1248
- webhook_path = "/webhook" # Standard path
1249
  full_webhook_url = None
1250
  if space_host:
1251
- protocol = "https"
1252
- host = space_host.split('://')[-1]
1253
- full_webhook_url = f"{protocol}://{host.rstrip('/')}{webhook_path}"
1254
  logger.info(f"Using SPACE_HOST to determine webhook URL: {full_webhook_url}")
1255
- else:
1256
- logger.critical("Could not construct webhook URL (SPACE_HOST env var missing or invalid).")
1257
- raise RuntimeError("Webhook URL undetermined.")
1258
 
1259
- # Set Webhook
1260
  if full_webhook_url and webhook_delete_success:
1261
  logger.info(f"Attempting to set webhook: {full_webhook_url}")
1262
- set_webhook_args = {
1263
- "url": full_webhook_url,
1264
- "allowed_updates": Update.ALL_TYPES,
1265
- "drop_pending_updates": True
1266
- }
1267
- if WEBHOOK_SECRET:
1268
- set_webhook_args["secret_token"] = WEBHOOK_SECRET
1269
- logger.info("Webhook secret token will be used.")
1270
-
1271
- await asyncio.sleep(1.0) # Short delay before setting
1272
-
1273
  try:
1274
  webhook_set = await ptb_app.bot.set_webhook(**set_webhook_args)
1275
- if not webhook_set:
1276
- raise RuntimeError("set_webhook API call returned False.")
1277
- await asyncio.sleep(1.5) # Longer delay after setting before verification
1278
  webhook_info = await ptb_app.bot.get_webhook_info()
1279
  if webhook_info and webhook_info.url == full_webhook_url:
1280
  logger.info(f"Webhook set and verified successfully: URL='{webhook_info.url}', Secret={'YES' if WEBHOOK_SECRET else 'NO'}")
1281
- if webhook_info.last_error_message:
1282
- logger.warning(f"Webhook status has last error: {webhook_info.last_error_message}")
1283
- else:
1284
- logger.error(f"Webhook verification failed! Expected '{full_webhook_url}', Got info: {webhook_info}")
1285
- raise RuntimeError("Webhook verification failed after setting.")
1286
-
1287
- await ptb_app.start() # Start PTB processing updates AFTER successful webhook setup
1288
  logger.info("PTB Application started (webhook mode).")
1289
- except Exception as e:
1290
- logger.error(f"FATAL: Failed to set or verify webhook: {e}", exc_info=True)
1291
- raise RuntimeError(f"Failed to set/verify webhook: {e}") from e
1292
- elif not webhook_delete_success:
1293
- logger.error("FATAL: Failed to delete previous webhook. Cannot set new one.")
1294
- raise RuntimeError("Failed to delete previous webhook.")
1295
- # else: # Already handled by raising error if full_webhook_url is None
1296
 
1297
- logger.info("ASGI Lifespan: Startup complete.");
1298
- yield # Application runs here
1299
 
1300
  except Exception as startup_err:
1301
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
1302
- # Ensure cleanup happens even if startup fails partially
1303
  if ptb_app:
1304
  try:
1305
  if ptb_app.running: await ptb_app.stop()
1306
- # Check if initialized before shutting down
1307
  if ptb_app._initialized: await ptb_app.shutdown()
1308
- except Exception as shutdown_err:
1309
- logger.error(f"Error during shutdown after startup failure: {shutdown_err}")
1310
- raise # Reraise the original startup error
1311
-
1312
  finally:
1313
- # Shutdown sequence
1314
  logger.info("ASGI Lifespan: Shutdown initiated...")
1315
  if ptb_app:
1316
  try:
1317
- if ptb_app.running:
1318
- logger.info("Stopping PTB application...")
1319
- await ptb_app.stop()
1320
- # Check if initialized before shutting down
1321
- if ptb_app._initialized:
1322
- logger.info("Shutting down PTB application...")
1323
- await ptb_app.shutdown()
1324
- logger.info("PTB Application shut down successfully.")
1325
- else:
1326
- logger.info("PTB Application was not fully initialized, skipping shutdown.")
1327
- except Exception as e:
1328
- logger.error(f"Error during PTB shutdown: {e}", exc_info=True)
1329
- else:
1330
- logger.info("PTB application object not created.")
1331
  logger.info("ASGI Lifespan: Shutdown complete.")
1332
 
1333
-
1334
  async def health_check(request: Request) -> PlainTextResponse:
 
1335
  global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled, SUPADATA_API_KEY
1336
- bot_status = "Not Initialized"
1337
- bot_username = "N/A"
1338
- # *** FIXED Attribute Name ***
1339
- if ptb_app and ptb_app.bot and ptb_app._initialized: # Check if initialized using _initialized
1340
  try:
1341
- # Quick check if webhook seems ok, more reliable than get_me() sometimes
1342
  wh_info = await ptb_app.bot.get_webhook_info()
1343
- # Check running status as well
1344
  if ptb_app.running and wh_info and wh_info.url:
1345
- bot_info = await ptb_app.bot.get_me()
1346
- bot_username = f"@{bot_info.username}"
1347
  bot_status = f"Running (Webhook OK, {bot_username})"
1348
- elif ptb_app.running:
1349
- bot_status = f"Running (Webhook Status: {wh_info.url if wh_info else 'N/A'}, Last Error: {wh_info.last_error_message if wh_info else 'N/A'})"
1350
  else: bot_status = "Initialized/Not running"
1351
- except Exception as e:
1352
- logger.error(f"Error checking bot status in health check: {e}", exc_info=True)
1353
- bot_status = f"Error checking status: {e}"
1354
- elif ptb_app:
1355
- bot_status = "Initializing..." # Or Initialized but not running yet
1356
 
1357
  health_info = [
1358
- f"=== Telegram Summary Bot Status ===",
1359
- f"Bot Application: {bot_status}",
1360
- "--- Services ---",
1361
- f"Primary Web Scraper: {'Crawl4AI' if _crawl4ai_primary_web_enabled else 'DISABLED (Lib Missing)'}",
1362
  f"Fallback Web Scraper 1: BeautifulSoup",
1363
  f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'DISABLED (No Key)'}",
1364
  f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'DISABLED (No Key/Lib)'}",
1365
  f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'DISABLED (No Key)'}",
1366
  f"Primary YT Transcript: youtube-transcript-api",
1367
  f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'DISABLED (No Key)'}",
1368
- f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'DISABLED (No Key)'}"
1369
- ]
1370
  return PlainTextResponse("\n".join(health_info))
1371
 
1372
-
1373
  async def telegram_webhook(request: Request) -> Response:
1374
  # ... (Keep existing implementation) ...
1375
  global WEBHOOK_SECRET, ptb_app
1376
- if not ptb_app:
1377
- logger.error("Webhook received but PTB application not initialized.")
1378
- return PlainTextResponse('Bot not initialized', status_code=503) # Service Unavailable
1379
- if not ptb_app._initialized: # Check _initialized flag
1380
- logger.error("Webhook received but PTB application not running (not initialized).")
1381
- return PlainTextResponse('Bot not initialized', status_code=503)
1382
- if not ptb_app.running:
1383
- logger.warning("Webhook received but PTB application not running.")
1384
- return PlainTextResponse('Bot not running', status_code=503) # Service Unavailable
1385
-
1386
- # Validate webhook secret if configured
1387
  if WEBHOOK_SECRET:
1388
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1389
- if token_header != WEBHOOK_SECRET:
1390
- logger.warning(f"Webhook received with invalid secret token. Header: '{token_header}'")
1391
- return Response(content="Invalid secret token", status_code=403) # Forbidden
1392
 
1393
  try:
1394
- # Process update
1395
  update_data = await request.json()
1396
  update = Update.de_json(data=update_data, bot=ptb_app.bot)
1397
  logger.debug(f"Processing update_id: {update.update_id} via webhook")
1398
- # Use process_update which handles tasks in the background
1399
  await ptb_app.process_update(update)
1400
- return Response(status_code=200) # OK - Acknowledge receipt quickly
1401
-
1402
- except json.JSONDecodeError:
1403
- logger.error("Webhook received invalid JSON.")
1404
- return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1405
- except Exception as e:
1406
- # Log error but return OK to Telegram to prevent retries for processing errors
1407
- logger.error(f"Error processing webhook update: {e}", exc_info=True)
1408
- return Response(status_code=200) # OK
1409
 
1410
 
1411
  # --- ASGI App Definition ---
1412
- # (Remains the same)
1413
- app = Starlette(
1414
- debug=False, # Set to True for more verbose errors during development if needed
1415
- lifespan=lifespan,
1416
- routes=[
1417
- Route("/", endpoint=health_check, methods=["GET"]),
1418
- Route("/webhook", endpoint=telegram_webhook, methods=["POST"]),
1419
- ]
1420
- )
1421
  logger.info("Starlette ASGI application created with native routes.")
1422
 
1423
 
1424
  # --- Development Runner ---
1425
- # (Remains the same)
1426
  if __name__ == '__main__':
 
1427
  import uvicorn
1428
  logger.warning("Running in development mode using Uvicorn directly - FOR LOCAL TESTING ONLY")
1429
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
1430
- # Use the PORT env var for local running too, defaulting to 8080
1431
  local_port = int(os.environ.get('PORT', 8080))
1432
-
1433
- # Make sure necessary env vars are loaded for local dev if not set system-wide
1434
- # Example using python-dotenv if you add it to requirements-dev.txt
1435
- try:
1436
- from dotenv import load_dotenv
1437
- load_dotenv()
1438
- logger.info("Loaded environment variables from .env file for local development.")
1439
- except ImportError:
1440
- logger.info(".env file not found or python-dotenv not installed, using system environment variables.")
1441
-
1442
-
1443
- # Re-check required tokens after potential .env load
1444
- if not get_secret('TELEGRAM_TOKEN'): logger.critical("Local Dev: TELEGRAM_TOKEN not found.")
1445
- if not get_secret('GEMINI_API_KEY'): logger.error("Local Dev: GEMINI_API_KEY not found.")
1446
- # Add checks for other keys as needed for local testing
1447
-
1448
- uvicorn.run(
1449
- "main:app",
1450
- host='0.0.0.0',
1451
- port=local_port,
1452
- log_level=log_level,
1453
- reload=True # Enable auto-reload for development
1454
- )
 
53
  # --- Google Gemini ---
54
  try:
55
  import google.generativeai as genai
 
56
  from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerateContentResponse
57
  _gemini_available = True
58
  except ImportError:
 
73
  logging.getLogger('uvicorn').setLevel(logging.INFO)
74
  logging.getLogger('starlette').setLevel(logging.INFO)
75
  if _gemini_available: logging.getLogger("google.ai.generativelanguage").setLevel(logging.WARNING)
 
76
  logging.getLogger("crawl4ai").setLevel(logging.INFO) # Adjust level as needed
77
  logging.getLogger("playwright").setLevel(logging.WARNING)
78
 
 
111
  # Models (User can still configure via env vars)
112
  OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "deepseek/deepseek-chat-v3-0324:free") # Fallback Model
113
  APIFY_ACTOR_ID = os.environ.get("APIFY_ACTOR_ID", "karamelo~youtube-transcripts")
 
114
  GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash-001") # Primary Model
115
 
116
  # --- Configuration Checks ---
 
156
  _gemini_primary_enabled = False
157
 
158
  # --- Constants ---
159
+ MAX_SUMMARY_CHUNK_SIZE = 4000
160
+ MAX_INPUT_TOKEN_APPROX = 500000 # Keep conservative estimate for gemini-2.0-flash
 
 
 
161
 
162
  # --- Retry Decorator ---
 
163
  @retry(
164
  stop=stop_after_attempt(4),
165
  wait=wait_exponential(multiplier=1, min=2, max=15),
 
168
  reraise=True
169
  )
170
  async def retry_bot_operation(func, *args, **kwargs):
171
+ # ... (Keep existing implementation) ...
172
  try:
173
  return await func(*args, **kwargs)
174
  except BadRequest as e:
175
+ ignore_errors = [ "message is not modified", "query is too old", "message to edit not found", "chat not found", "bot was blocked by the user", ]
 
 
 
176
  if any(err in str(e).lower() for err in ignore_errors):
177
  logger.warning(f"Ignoring non-critical BadRequest: {e}")
178
  return None
 
179
  logger.error(f"Potentially critical BadRequest: {e}")
180
  raise
181
  except TelegramError as e:
182
+ if isinstance(e, (TimedOut, NetworkError, RetryAfter)): logger.warning(f"Telegram transient error (will retry): {e}")
183
+ else: logger.error(f"Unhandled TelegramError: {e}")
184
+ raise
 
 
 
185
  except Exception as e:
186
  logger.error(f"Unexpected error during bot operation: {e}", exc_info=True)
187
  raise
 
189
  # --- Helper Functions ---
190
  # (is_youtube_url, extract_youtube_id remain the same)
191
  def is_youtube_url(url):
192
+ # ... (Keep existing implementation) ...
193
  youtube_regex = re.compile( r'(?:https?://)?(?:www.)?(?:m.)?(?:youtube(?:-nocookie)?.com|youtu.be)/' r'(?:watch?v=|embed/|v/|shorts/|live/|attribution_link?a=.&u=/watch?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
194
  match = youtube_regex.search(url); logger.debug(f"is_youtube_url '{url}': {bool(match)}"); return bool(match)
195
  def extract_youtube_id(url):
196
+ # ... (Keep existing implementation) ...
197
  youtube_regex = re.compile( r'(?:https?://)?(?:www.)?(?:m.)?(?:youtube(?:-nocookie)?.com|youtu.be)/' r'(?:watch?v=|embed/|v/|shorts/|live/|attribution_link?a=.&u=/watch?v=)?' r'([\w-]{11})' r'(?:\S+)?', re.IGNORECASE)
198
  match = youtube_regex.search(url)
199
  if match: video_id = match.group(1); logger.debug(f"Extracted YT ID '{video_id}' from {url}"); return video_id
 
235
  return None
236
  except Exception as e: logger.error(f"[Supadata] Unexpected error for {video_id}: {e}", exc_info=True); return None
237
 
 
238
  async def get_transcript_via_apify(video_url: str, api_token: str) -> Optional[str]:
239
  # ... (Keep existing implementation) ...
240
  global APIFY_ACTOR_ID
 
283
  except httpx.RequestError as e: logger.error(f"[Apify SyncItems] Request error during API interaction for {video_url}: {e}"); return None
284
  except Exception as e: logger.error(f"[Apify SyncItems] Unexpected error during Apify SyncItems REST call for {video_url}: {e}", exc_info=True); return None
285
 
 
286
  async def get_youtube_transcript(video_id: str, video_url: str) -> Optional[str]:
287
  # ... (Keep existing implementation) ...
288
  global SUPADATA_API_KEY, APIFY_API_TOKEN
 
291
  transcript_text = None
292
  logger.info("[Primary YT] Attempting youtube-transcript-api...")
293
  try:
 
294
  transcript_list = await asyncio.to_thread(
295
  YouTubeTranscriptApi.list_transcripts(video_id).find_generated_transcript(['en', 'en-GB', 'en-US']).fetch
296
  )
 
297
  if transcript_list: transcript_text = " ".join([item['text'] for item in transcript_list if 'text' in item])
298
  if transcript_text: logger.info(f"[Primary YT] Success via lib for {video_id} (len: {len(transcript_text)})"); return transcript_text.strip()
299
  else: logger.warning(f"[Primary YT] Transcript list/text empty for {video_id}"); transcript_text = None
 
305
  transcript_text = None
306
  except Exception as e:
307
  logger.warning(f"[Primary YT] Error via lib for {video_id}: {e}")
 
308
  transcript_text = None
309
 
310
  if transcript_text is None:
311
  logger.info("[Fallback YT 1] Trying Supadata API...")
312
  if SUPADATA_API_KEY:
313
  transcript_text = await get_transcript_via_supadata(video_id, SUPADATA_API_KEY)
314
+ if transcript_text: logger.info(f"[Fallback YT 1] Success via Supadata for {video_id}"); return transcript_text
315
  else: logger.warning(f"[Fallback YT 1] Supadata failed or no content for {video_id}.")
316
  else: logger.warning("[Fallback YT 1] Supadata API key unavailable. Skipping.")
317
 
 
319
  logger.info("[Fallback YT 2] Trying Apify REST API (SyncItems)...")
320
  if APIFY_API_TOKEN:
321
  transcript_text = await get_transcript_via_apify(video_url, APIFY_API_TOKEN)
322
+ if transcript_text: logger.info(f"[Fallback YT 2] Success via Apify SyncItems REST for {video_url}"); return transcript_text
323
  else: logger.warning(f"[Fallback YT 2] Apify SyncItems REST failed or no content for {video_url}.")
324
  else: logger.warning("[Fallback YT 2] Apify API token unavailable. Skipping.")
325
 
326
  if transcript_text is None: logger.error(f"All methods failed for YT transcript: {video_id}"); return None
 
327
  return transcript_text
328
 
329
 
 
341
  return None
342
 
343
  logger.info(f"[Crawl4AI Primary] Attempting to crawl URL: {url}")
 
 
 
 
 
 
 
 
 
 
344
 
345
  try:
346
+ # *** MODIFIED: Instantiate with ignore_robots=True to bypass cache creation ***
347
+ async with AsyncWebCrawler(ignore_robots=True) as crawler:
348
+ logger.info(f"[Crawl4AI Primary] Initialized crawler (ignore_robots=True).")
 
349
  # Use arun for a single URL crawl
350
  result = await crawler.arun(url=url, crawler_strategy="playwright", timeout=90) # 90 sec timeout
351
 
 
356
  return content
357
  else:
358
  logger.warning(f"[Crawl4AI Primary] Crawl successful for {url}, but extracted markdown content is empty.")
 
359
  if result.text:
360
  content = result.text.strip()
361
  if content:
362
  logger.info(f"[Crawl4AI Primary] Using .text fallback after empty markdown. Length: {len(content)}")
363
  return content
364
  return None # Return None if both markdown and text are empty
365
+ elif result and result.text:
366
  content = result.text.strip()
367
  if content:
368
  logger.info(f"[Crawl4AI Primary] Success crawling {url} (using .text, markdown missing). Length: {len(content)}")
 
376
  except asyncio.TimeoutError:
377
  logger.error(f"[Crawl4AI Primary] Timeout occurred while crawling {url}")
378
  return None
379
+ except PermissionError as e: # Should hopefully not happen now, but keep for logging
380
+ logger.error(f"[Crawl4AI Primary] Permission denied during crawl for {url}. Target path: '{e.filename}'. Error: {e}", exc_info=True)
381
+ return None
 
382
  except Exception as e:
383
  logger.error(f"[Crawl4AI Primary] Unexpected error during crawl for {url}: {e}", exc_info=True)
 
384
  return None
385
 
386
 
 
396
  response.raise_for_status()
397
  content_type = response.headers.get('content-type', '').lower()
398
  if 'html' not in content_type: logger.warning(f"[Web Scrape BS4] Non-HTML content type from {url}: {content_type}"); return None
399
+ try: return response.text
400
  except Exception as e: logger.error(f"[Web Scrape BS4] Error getting response text for {url}: {e}"); return None
401
  except httpx.HTTPStatusError as e: logger.error(f"[Web Scrape BS4] HTTP error {e.response.status_code} fetching {url}: {e}")
402
  except httpx.TimeoutException: logger.error(f"[Web Scrape BS4] Timeout error fetching {url}")
 
406
  return None
407
 
408
  async def get_website_content_bs4(url: str) -> Optional[str]:
 
409
  # ... (Keep existing implementation) ...
410
  if not url: logger.error("[BS4 Fallback] get_website_content_bs4: No URL"); return None
411
  logger.info(f"[BS4 Fallback] Attempting basic fetch & parse for: {url}")
 
413
  if not html_content:
414
  logger.warning(f"[BS4 Fallback] Failed to fetch HTML for {url}")
415
  return None
 
416
  try:
 
417
  def parse_html(content):
418
  soup = BeautifulSoup(content, DEFAULT_PARSER)
419
+ for element in soup(["script", "style", "header", "footer", "nav", "aside", "form", "button", "input", "iframe", "img", "svg", "link", "meta", "noscript", "figure", "figcaption", "video", "audio"]): element.extract()
420
+ selectors = ['main', 'article', '[role="main"]', '#content', '.content', '#main-content', '.main-content', '#body', '.body', '#article-body', '.article-body', '.post-content', '.entry-content']
 
 
 
421
  target_element = None
422
  for selector in selectors:
423
  try:
424
  target_element = soup.select_one(selector)
425
  if target_element: break
426
+ except Exception as sel_e: logger.warning(f"[BS4 Fallback] Invalid selector '{selector}': {sel_e}"); continue
427
+ if not target_element: target_element = soup.body
 
 
 
 
428
  if not target_element: logger.warning(f"[BS4 Fallback] Could not find body/main for parsing {url}"); return None
 
 
429
  lines = [line.strip() for line in target_element.get_text(separator='\n', strip=True).splitlines() if line.strip()]
430
+ text = " ".join(lines)
431
+ text = re.sub(r'\s{2,}', ' ', text).strip()
 
 
 
432
  if not text: logger.warning(f"[BS4 Fallback] Extracted text is empty after cleaning for {url}"); return None
433
  return text
 
434
  text_content = await asyncio.to_thread(parse_html, html_content)
435
  if text_content:
436
  logger.info(f"[BS4 Fallback] Success scrape/parse for {url} (final len: {len(text_content)})")
 
444
 
445
  # Fallback 2: urltotext.com API
446
  async def get_website_content_via_api(url: str, api_key: str) -> Optional[str]:
 
447
  # ... (Keep existing implementation) ...
448
  if not url: logger.error("[API Fallback] No URL"); return None
449
  if not api_key: logger.error("[API Fallback] urltotext.com API key missing."); return None
 
465
  else: logger.warning(f"[API Fallback] urltotext.com API success but content empty for {url}. Resp: {data}"); return None
466
  except json.JSONDecodeError: logger.error(f"[API Fallback] Failed JSON decode urltotext.com for {url}. Resp:{response.text[:500]}"); return None
467
  except Exception as e: logger.error(f"[API Fallback] Error processing urltotext.com success response for {url}: {e}", exc_info=True); return None
468
+ elif response.status_code == 402: logger.error(f"[API Fallback] Error 402 (Insufficient Credits) from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
469
+ # Added check for 400 specifically mentioning bad URL
470
+ elif response.status_code == 400 and "url" in response.text.lower():
471
+ logger.error(f"[API Fallback] Error 400 (Likely Bad URL) from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
472
  elif response.status_code in [400, 401, 403, 422, 500]: logger.error(f"[API Fallback] Error {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
473
  else: logger.error(f"[API Fallback] Unexpected status {response.status_code} from urltotext.com API for {url}. Resp:{response.text[:200]}"); return None
474
  except httpx.TimeoutException: logger.error(f"[API Fallback] Timeout connecting to urltotext.com API for {url}"); return None
 
483
  logger.error("[Gemini Primary] Called but is disabled.");
484
  return None, "Error: Primary AI service (Gemini) not configured/available."
485
 
 
486
  if len(text) > MAX_INPUT_TOKEN_APPROX:
487
  logger.warning(f"[Gemini Primary] Input text length ({len(text)}) exceeds limit ({MAX_INPUT_TOKEN_APPROX}). Truncating.")
488
  text = text[:MAX_INPUT_TOKEN_APPROX]
489
 
490
  logger.info(f"[Gemini Primary] Generating {summary_type} summary using {GEMINI_MODEL}. Input length: {len(text)}")
491
 
 
492
  if summary_type == "paragraph":
493
+ prompt = f"""Please summarise the following text into a concise paragraph...""" # Keep prompt
 
 
 
 
 
 
 
494
  elif summary_type == "points":
495
+ prompt = f"""Please summarise the following text into a list of key bullet points...""" # Keep prompt
 
 
 
 
 
 
 
496
  else:
497
  logger.error(f"[Gemini Primary] Invalid summary_type: {summary_type}")
498
  return None, f"Error: Invalid summary type '{summary_type}' specified."
499
 
500
+ # *** MODIFIED: Disable safety settings as requested ***
501
+ safety_settings = { category: HarmBlockThreshold.BLOCK_NONE for category in HarmCategory }
502
+ # safety_settings = {
503
+ # HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
504
+ # HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
505
+ # HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
506
+ # HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
507
+ # # HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: HarmBlockThreshold.BLOCK_NONE # If supported by model
508
+ # }
509
+ logger.info("[Gemini Primary] Safety settings disabled (BLOCK_NONE).")
510
+
511
 
 
512
  generation_config = genai.types.GenerationConfig(
513
+ max_output_tokens=2048,
514
+ temperature=0.7,
 
 
 
 
515
  )
516
 
517
  try:
518
  model = genai.GenerativeModel(GEMINI_MODEL)
519
  logger.debug(f"[Gemini Primary] Sending request to model {GEMINI_MODEL}")
520
+ response: GenerateContentResponse = await model.generate_content_async(
521
  prompt,
522
  generation_config=generation_config,
523
  safety_settings=safety_settings,
 
524
  )
525
 
526
+ # 1. Check for explicit blocks (even with BLOCK_NONE, core harms might be blocked)
527
  if not response.candidates:
528
  block_reason = "Unknown"
529
+ safety_ratings_str = "N/A"
530
  if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
531
  block_reason = response.prompt_feedback.block_reason or "Reason not specified"
532
+ if response.prompt_feedback.safety_ratings:
533
+ safety_ratings_str = ', '.join([f"{r.category.name}: {r.probability.name}" for r in response.prompt_feedback.safety_ratings])
534
+ error_msg = f"Error: Gemini response blocked or empty (Prompt Feedback). Reason: {block_reason}. Safety: {safety_ratings_str}"
535
  logger.error(f"[Gemini Primary] {error_msg}")
536
  return None, error_msg
537
 
538
+ # 2. Check candidate's finish reason and safety ratings
539
+ candidate = response.candidates[0]
540
+ finish_reason_val = candidate.finish_reason
541
+ finish_reason_str = str(finish_reason_val).upper() # Convert to upper string for reliable compare
542
+ logger.debug(f"[Gemini Primary] Received response. Finish reason value: {finish_reason_val} -> {finish_reason_str}")
543
+
544
+ # Check for safety issues in the *candidate* itself
545
+ candidate_safety_ratings_str = "N/A"
546
+ if hasattr(candidate, 'safety_ratings') and candidate.safety_ratings:
547
+ candidate_safety_ratings_str = ', '.join([f"{r.category.name}: {r.probability.name}" for r in candidate.safety_ratings])
548
+ # Check if any category was blocked, even with BLOCK_NONE (might indicate core harm)
549
+ # This logic might be too strict if BLOCK_NONE truly means nothing is blocked by threshold.
550
+ # Let's rely on finish_reason == SAFETY first.
551
+ # if any(r.probability.name in ["MEDIUM", "HIGH"] for r in candidate.safety_ratings):
552
+ # logger.warning(f"[Gemini Primary] Candidate has medium/high safety rating despite BLOCK_NONE. Ratings: {candidate_safety_ratings_str}")
553
+
554
+
555
+ # *** MODIFIED: Refined Finish Reason Check ***
556
+ # Reasons indicating success or acceptable completion:
557
+ success_reasons = ["STOP", "MAX_TOKENS"]
558
+ # Reasons indicating definite failure:
559
+ failure_reasons = ["SAFETY", "RECITATION", "OTHER"] # Treat OTHER as failure
560
+
561
+ if finish_reason_str in success_reasons:
562
+ # Proceed to extract text
563
+ logger.info(f"[Gemini Primary] Generation finished successfully or reached token limit. Reason: {finish_reason_str}")
564
+ if finish_reason_str == "MAX_TOKENS":
565
+ logger.warning("[Gemini Primary] Output may be truncated due to max_tokens limit.")
566
+ # Continue below to extract text...
567
+
568
+ elif finish_reason_str in failure_reasons:
569
+ error_msg = f"Error: Gemini generation failed. Reason: {finish_reason_str}. Safety: {candidate_safety_ratings_str}"
570
+ logger.error(f"[Gemini Primary] {error_msg}")
571
+ return None, error_msg # Return specific error
572
 
573
+ else:
574
+ # Unknown or unexpected finish reason
575
+ error_msg = f"Error: Gemini generation finished with unexpected reason: {finish_reason_str}. Safety: {candidate_safety_ratings_str}"
576
+ logger.error(f"[Gemini Primary] {error_msg}")
577
+ return None, error_msg # Return error
 
 
 
 
 
 
578
 
 
 
 
 
 
579
 
580
+ # 3. Extract text if finish reason was acceptable
581
  summary_text = ""
582
  try:
583
+ summary_text = response.text # Preferred way
 
584
  except ValueError as e:
 
585
  logger.warning(f"[Gemini Primary] Error accessing response.text: {e}. Trying parts manually.")
586
+ if candidate.content and candidate.content.parts:
587
+ summary_text = "".join(part.text for part in candidate.content.parts if hasattr(part, "text"))
 
588
 
589
  if not summary_text or not summary_text.strip():
590
+ # If text is empty even after successful finish reason, log warning and return error
591
+ logger.warning(f"[Gemini Primary] Gemini returned empty summary despite finish reason '{finish_reason_str}'.")
592
  return None, "Error: AI generated an empty summary."
593
 
594
+ logger.info(f"[Gemini Primary] Summary extracted successfully (len: {len(summary_text)}).")
595
  return summary_text.strip(), None
596
 
597
  except AttributeError as e:
 
598
  logger.error(f"[Gemini Primary] Attribute error accessing Gemini response: {e}. Response structure might have changed.", exc_info=True)
599
  return None, f"Error: Failed to parse Gemini response. Details: {e}"
600
  except Exception as e:
601
  logger.error(f"[Gemini Primary] Error during API call to {GEMINI_MODEL}: {e}", exc_info=True)
 
 
 
602
  return None, f"Error: Failed to communicate with the primary AI service (Gemini). Details: {e}"
603
 
604
 
605
  async def _call_openrouter(text: str, summary_type: str) -> Tuple[Optional[str], Optional[str]]:
606
  """ Calls the OpenRouter API to generate a summary. """
607
+ # ... (Keep existing implementation) ...
608
  global OPENROUTER_API_KEY, OPENROUTER_MODEL, _openrouter_fallback_enabled
609
  if not _openrouter_fallback_enabled:
610
  logger.error("[OpenRouter Fallback] Called but is disabled.");
611
  return None, "Error: Fallback AI service (OpenRouter) not configured/available."
612
 
613
+ max_input_len_openrouter = 100000
614
  if len(text) > max_input_len_openrouter:
615
  logger.warning(f"[OpenRouter Fallback] Input text length ({len(text)}) exceeds approx limit ({max_input_len_openrouter}) for {OPENROUTER_MODEL}. Truncating.")
616
  text = text[:max_input_len_openrouter]
617
 
618
  logger.info(f"[OpenRouter Fallback] Generating {summary_type} summary using {OPENROUTER_MODEL}. Input length: {len(text)}")
619
 
620
+ if summary_type == "paragraph": prompt_content = f"Please summarise... {text} ..." # Keep prompts
621
+ elif summary_type == "points": prompt_content = f"Please summarise... {text} ..."
622
+ else: logger.error(f"[OpenRouter Fallback] Invalid summary_type: {summary_type}"); return None, f"Error: Invalid summary type '{summary_type}' specified."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
 
624
+ headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "...", "X-Title": "..." }
625
+ payload = { "model": OPENROUTER_MODEL, "messages": [{"role": "system", "content": "..."}, {"role": "user", "content": prompt_content}], "max_tokens": 2048, "temperature": 0.7 }
626
  api_url = "https://openrouter.ai/api/v1/chat/completions"
627
 
628
  try:
629
+ async with httpx.AsyncClient(timeout=120.0) as client:
630
  logger.debug(f"[OpenRouter Fallback] Sending request to {api_url} for model {OPENROUTER_MODEL}")
631
  response = await client.post(api_url, headers=headers, json=payload)
632
  logger.debug(f"[OpenRouter Fallback] Received status code {response.status_code}")
 
637
  if data.get("choices") and len(data["choices"]) > 0:
638
  choice = data["choices"][0]
639
  message = choice.get("message")
640
+ finish_reason = choice.get("finish_reason", "N/A")
 
641
  if message and message.get("content"):
642
  summary_text = message["content"].strip()
643
  if summary_text:
644
  logger.info(f"[OpenRouter Fallback] Summary generated successfully (len: {len(summary_text)}). Finish: {finish_reason}")
645
+ if finish_reason == 'length': logger.warning("[OpenRouter Fallback] Summary may be truncated due to max_tokens limit.")
 
 
646
  return summary_text, None
647
+ else: logger.warning("[OpenRouter Fallback] OpenRouter returned an empty summary content."); return None, "Error: Fallback AI generated an empty summary."
648
+ else: logger.error(f"[OpenRouter Fallback] Invalid response structure (missing message/content). Data: {data}"); return None, "Error: Fallback AI returned an invalid response format."
 
 
 
 
649
  else:
650
  logger.error(f"[OpenRouter Fallback] Invalid response structure (missing choices). Data: {data}")
651
+ api_error = data.get("error", {}).get("message", "Unknown API error"); return None, f"Error: Fallback AI response missing summary. API msg: {api_error}"
652
+ except json.JSONDecodeError: logger.error(f"[OpenRouter Fallback] Failed to decode JSON response. Status: {response.status_code}, Text: {response.text[:500]}"); return None, "Error: Fallback AI sent an invalid JSON response."
653
+ except Exception as e: logger.error(f"[OpenRouter Fallback] Error processing success response: {e}", exc_info=True); return None, f"Error: Failed to process Fallback AI response. Details: {e}"
 
 
 
 
 
 
 
 
654
  else:
 
655
  error_message = f"Error: Fallback AI service ({OPENROUTER_MODEL}) returned status {response.status_code}."
656
+ try: error_details = response.json().get("error", {}).get("message", response.text[:200]); error_message += f" Details: {error_details}"
657
+ except Exception: error_message += f" Response: {response.text[:200]}"
658
+ logger.error(f"[OpenRouter Fallback] {error_message}"); return None, error_message
659
+ except httpx.TimeoutException: logger.error(f"[OpenRouter Fallback] Timeout connecting to OpenRouter API for {OPENROUTER_MODEL}"); return None, "Error: Timed out connecting to the fallback AI service."
660
+ except httpx.RequestError as e: logger.error(f"[OpenRouter Fallback] Request error connecting to OpenRouter API: {e}"); return None, f"Error: Network error connecting to the fallback AI service. Details: {e}"
661
+ except Exception as e: logger.error(f"[OpenRouter Fallback] Unexpected error during OpenRouter API call: {e}", exc_info=True); return None, f"Error: Unexpected issue with the fallback AI service. Details: {e}"
 
 
 
 
 
 
 
 
 
 
 
662
 
663
 
664
  async def generate_summary(text: str, summary_type: str) -> str:
665
  """ Generates a summary using the primary AI (Gemini) and falling back to OpenRouter. """
666
+ # ... (Keep existing implementation) ...
667
  global _gemini_primary_enabled, _openrouter_fallback_enabled, GEMINI_MODEL, OPENROUTER_MODEL
668
  logger.info(f"[Summary Generation] Starting process. Primary: Gemini ({GEMINI_MODEL}), Fallback: OpenRouter ({OPENROUTER_MODEL})")
669
+ error_message: Optional[str] = None
 
 
 
670
  if _gemini_primary_enabled:
671
  logger.info(f"[Summary Generation] Attempting primary AI: Gemini ({GEMINI_MODEL})")
672
  primary_summary, primary_error = await _call_gemini(text, summary_type)
673
  if primary_summary:
674
  logger.info(f"[Summary Generation] Success with primary AI (Gemini).")
675
+ return primary_summary
676
  else:
677
  logger.warning(f"[Summary Generation] Primary AI (Gemini) failed. Error: {primary_error}. Proceeding to fallback.")
678
+ error_message = f"Primary AI ({GEMINI_MODEL}) failed: {primary_error}"
679
  else:
680
  logger.warning("[Summary Generation] Primary AI (Gemini) disabled. Proceeding to fallback.")
681
  error_message = "Primary AI (Gemini) unavailable."
682
 
 
683
  if _openrouter_fallback_enabled:
684
  logger.info(f"[Summary Generation] Attempting fallback AI: OpenRouter ({OPENROUTER_MODEL})")
685
  fallback_summary, fallback_error = await _call_openrouter(text, summary_type)
686
  if fallback_summary:
687
  logger.info(f"[Summary Generation] Success with fallback AI (OpenRouter).")
688
+ return fallback_summary
689
  else:
690
  logger.error(f"[Summary Generation] Fallback AI (OpenRouter) also failed. Error: {fallback_error}")
691
+ if error_message: return f"{error_message}\nFallback AI ({OPENROUTER_MODEL}) also failed: {fallback_error}"
692
+ else: return f"Fallback AI ({OPENROUTER_MODEL}) failed: {fallback_error}"
 
 
 
693
  else:
694
  logger.error("[Summary Generation] Fallback AI (OpenRouter) is disabled. Cannot proceed.")
695
+ if error_message: return f"{error_message}\nFallback AI is also unavailable."
696
+ else: return "Error: Both primary and fallback AI services are unavailable."
 
 
697
 
 
698
  logger.error("[Summary Generation] Reached end of function unexpectedly. No summary generated.")
699
  final_error = error_message or "Unknown summary generation error."
700
  return f"Sorry, an error occurred: {final_error}"
 
710
  bot_token: str
711
  ) -> None:
712
  """Handles fetching content, generating summary, and sending results."""
713
+ # ... (Keep existing implementation) ...
714
  task_id = f"{user_id}-{message_id_to_edit or 'new'}"
715
  logger.info(f"[Task {task_id}] Starting processing for URL: {url}")
716
  background_request: Optional[BaseRequest] = None
 
719
  user_feedback_message: Optional[str] = None
720
  success = False
721
  status_message_id = message_id_to_edit # Keep track of the original button message ID
 
722
 
723
  try:
 
724
  background_request = HTTPXRequest(connect_timeout=15.0, read_timeout=60.0, write_timeout=60.0, pool_timeout=60.0)
725
  bot = Bot(token=bot_token, request=background_request)
726
  except Exception as e:
727
+ logger.critical(f"[Task {task_id}] Failed to create background bot: {e}", exc_info=True); return
 
 
728
 
729
  try:
730
+ # Edit original button message to "Processing..."
 
731
  processing_message_text = f"Got it! Generating '{summary_type}' summary for:\n{html.escape(url)}\n\nThis might take a moment..."
732
+ if status_message_id:
 
733
  try:
734
+ await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=processing_message_text, parse_mode=ParseMode.HTML, reply_markup=None, link_preview_options={'is_disabled': True} )
 
 
 
 
 
 
 
 
735
  logger.debug(f"[Task {task_id}] Edited original button message {status_message_id} to 'Processing'")
 
736
  except Exception as e:
737
+ logger.warning(f"[Task {task_id}] Could not edit original button message {status_message_id}: {e}.")
 
 
738
 
 
739
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
740
+ except Exception: pass
741
 
742
  # --- Determine URL Type and Fetch Content ---
743
  is_youtube = is_youtube_url(url)
 
745
 
746
  if is_youtube:
747
  video_id = extract_youtube_id(url)
748
+ if video_id: content = await get_youtube_transcript(video_id, url)
749
+ else: user_feedback_message = "Sorry, I couldn't understand that YouTube URL format."
750
+ if not content and not user_feedback_message: user_feedback_message = "Sorry, I couldn't get the transcript for that YouTube video..."
 
 
 
 
751
  else:
752
+ # Website URL - Try methods in order
753
  logger.info(f"[Task {task_id}] URL is website. Attempting Crawl4AI (Primary)...")
754
  content = await get_website_content_via_crawl4ai(url)
 
755
  if not content:
756
+ logger.warning(f"[Task {task_id}] Crawl4AI failed. Attempting BeautifulSoup (Fallback 1)...")
 
757
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
758
  except Exception: pass
759
  content = await get_website_content_bs4(url)
 
760
  if not content:
761
+ logger.warning(f"[Task {task_id}] BeautifulSoup failed. Attempting API (Fallback 2)...")
762
  global URLTOTEXT_API_KEY, _urltotext_fallback_enabled
763
  if _urltotext_fallback_enabled:
764
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
765
  except Exception: pass
766
  content = await get_website_content_via_api(url, URLTOTEXT_API_KEY)
767
+ if not content: logger.error(f"[Task {task_id}] API fallback (urltotext) also failed."); user_feedback_message = "Sorry, couldn't fetch content (Crawl4AI/BS4 failed, API failed/credits?)."
768
+ else: logger.warning(f"[Task {task_id}] API fallback disabled."); user_feedback_message = "Sorry, couldn't fetch content (Crawl4AI/BS4 failed, API disabled)."
769
+ if not content and not user_feedback_message: user_feedback_message = "Sorry, couldn't fetch content using any method."
 
 
 
 
 
 
 
 
770
 
771
 
772
  # --- Generate Summary if Content was Fetched ---
 
774
  logger.info(f"[Task {task_id}] Content fetched (len:{len(content)}). Generating summary type: '{summary_type}'.")
775
  try: await retry_bot_operation(bot.send_chat_action, chat_id=chat_id, action='typing')
776
  except Exception: pass
777
+ final_summary = await generate_summary(content, summary_type)
 
778
 
779
  if final_summary.startswith("Error:") or final_summary.startswith("Sorry,"):
780
+ user_feedback_message = final_summary
781
  logger.warning(f"[Task {task_id}] Summary generation failed: {final_summary}")
782
+ success = False
783
  else:
784
+ # Success - Split and Send
785
  summary_parts = []
786
+ current_part = ""; lines = final_summary.splitlines(keepends=True)
 
 
787
  for line in lines:
788
  if len(current_part) + len(line) > MAX_SUMMARY_CHUNK_SIZE:
789
+ if current_part.strip(): summary_parts.append(current_part.strip())
790
+ current_part = line[:MAX_SUMMARY_CHUNK_SIZE] if len(line) > MAX_SUMMARY_CHUNK_SIZE else line
791
+ else: current_part += line
 
 
 
 
 
792
  if current_part.strip(): summary_parts.append(current_part.strip())
793
+ if not summary_parts: summary_parts.append("Summary generated, but empty."); logger.warning(...)
 
 
794
 
795
  logger.info(f"[Task {task_id}] Summary generated (orig len: {len(final_summary)}). Sending in {len(summary_parts)} part(s).")
796
 
797
+ # Edit original message with first part
798
  edited_final_result = False
799
+ if status_message_id:
800
  try:
801
+ await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
802
+ logger.debug(f"[Task {task_id}] Edited original message {status_message_id} with first summary part.")
 
 
 
 
 
 
 
803
  edited_final_result = True
804
+ except Exception as edit_err: logger.warning(f"[Task {task_id}] Failed edit original message {status_message_id} with summary part 1: {edit_err}. Sending new.")
 
 
 
 
805
  if not edited_final_result:
806
+ sent_msg = await retry_bot_operation( bot.send_message, chat_id=chat_id, text=summary_parts[0], parse_mode=None, link_preview_options={'is_disabled': True} )
807
+ if not sent_msg: user_feedback_message = "Sorry, failed to send summary."; success = False; logger.error(...)
808
+
809
+ # Send remaining parts if needed and success still true
810
+ if success and len(summary_parts) > 1:
 
 
 
 
 
 
 
 
 
811
  for i, part in enumerate(summary_parts[1:], start=2):
812
+ await asyncio.sleep(0.5)
813
  try:
814
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=part, parse_mode=None, link_preview_options={'is_disabled': True} )
 
 
 
 
 
 
815
  logger.debug(f"[Task {task_id}] Sent summary part {i}/{len(summary_parts)}.")
816
+ except Exception as part_err: user_feedback_message = f"Sorry, failed to send part {i}."; success = False; logger.error(...); break
817
+ if not user_feedback_message: success = True # Confirm success if no errors occurred
818
+
819
+ # --- Handle Failure Cases ---
820
+ if not success:
821
+ if not user_feedback_message: user_feedback_message = "Sorry, unknown error."; logger.error(...)
822
+ logger.warning(f"[Task {task_id}] Sending failure feedback: {user_feedback_message}")
 
 
 
 
 
 
 
 
 
 
823
  try:
824
+ # Edit original message with error
825
  edited_final_error = False
826
  if status_message_id:
827
  try:
828
+ await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=user_feedback_message, link_preview_options={'is_disabled': True}, reply_markup=None )
829
+ logger.debug(f"[Task {task_id}] Edited original message {status_message_id} with failure feedback.")
 
 
 
 
 
 
 
830
  edited_final_error = True
831
+ except Exception as edit_err: logger.warning(f"[Task {task_id}] Failed edit original message {status_message_id} with error: {edit_err}. Sending new.")
 
 
 
832
  if not edited_final_error:
833
+ await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message, link_preview_options={'is_disabled': True} )
 
 
 
 
 
834
  logger.debug(f"[Task {task_id}] Sent failure feedback as new message.")
835
+ except Exception as send_err: logger.error(f"[Task {task_id}] Failed even to send failure feedback: {send_err}")
 
836
 
837
  except Exception as e:
838
+ # Catch-all for unexpected errors in task
839
  logger.error(f"[Task {task_id}] Unexpected error during processing task: {e}", exc_info=True)
840
+ success = False; user_feedback_message = "Oops! Something went wrong..."
841
+ if bot:
842
+ try: # Try sending final crash error
843
+ edited_crash = False
 
 
844
  if status_message_id:
845
+ try: await retry_bot_operation( bot.edit_message_text, chat_id=chat_id, message_id=status_message_id, text=user_feedback_message, reply_markup=None, link_preview_options={'is_disabled': True} ); edited_crash = True
846
+ except Exception: pass
847
+ if not edited_crash: await retry_bot_operation( bot.send_message, chat_id=chat_id, text=user_feedback_message )
848
+ except Exception as final_err: logger.error(f"[Task {task_id}] Failed to send final crash error feedback: {final_err}")
 
 
 
 
849
 
850
  finally:
851
+ # Close background bot client
 
 
 
 
 
852
  if background_request and hasattr(background_request, '_client') and background_request._client:
853
+ try: await background_request._client.aclose(); logger.debug(f"[Task {task_id}] Background bot client closed.")
854
+ except Exception as close_err: logger.warning(f"[Task {task_id}] Error closing background bot client: {close_err}")
 
 
 
 
855
  logger.info(f"[Task {task_id}] Task completed. Success: {success}")
856
 
857
 
858
  # --- Telegram Handlers ---
859
  # (start, help_command, handle_potential_url, handle_summary_type_callback, error_handler)
860
+ # No changes needed in these handlers.
 
861
  async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
862
+ # ... (Keep existing implementation) ...
863
  user = update.effective_user; mention = user.mention_html()
864
  if not user or not update.message: return
865
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /start.")
866
  await update.message.reply_html( f"👋 Hello {mention}! I can summarise YouTube links or website URLs.\n\nJust send me a link anytime!" )
867
 
868
  async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
869
+ # ... (Keep existing implementation) ...
870
  user = update.effective_user
871
  if not user or not update.message: return
872
  logger.info(f"User {user.id} ({user.username or 'N/A'}) used /help.")
 
874
  "1. Send me any YouTube video link or website URL.\n"
875
  "2. I'll ask how you want it summarised (paragraph or points).\n"
876
  "3. Click the button for your choice.\n"
877
+ "4. Wait while I process it!\n\n"
878
  "⚙️ **Behind the scenes:**\n"
879
+ f"• **Websites:** I try `Crawl4AI` (smart crawl, ignores robots.txt), then `BeautifulSoup` (basic scrape), and `urltotext.com` API (if configured & credits available).\n" # Mention ignore_robots
880
  "• **YouTube:** I use `youtube-transcript-api` first, then fall back to `Supadata` and `Apify` APIs if needed.\n"
881
  f"• **Summaries:** Generated using Google `{GEMINI_MODEL}` (primary) or `{OPENROUTER_MODEL}` (fallback, if configured).\n\n"
882
  "**Commands:**\n"
 
885
  await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
886
 
887
  async def handle_potential_url(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
888
+ # ... (Keep existing implementation) ...
889
  if not update.message or not update.message.text: return
890
  url = update.message.text.strip(); user = update.effective_user
891
  if not user: return
892
+ url_pattern = re.compile(r'https?://[^\s/$.?#].[^\s]*', re.IGNORECASE)
 
893
  if not url_pattern.match(url):
894
  logger.debug(f"Ignoring non-URL from {user.id}: {url}")
895
+ await update.message.reply_text("Hmm, that doesn't look like a valid web URL...", parse_mode=ParseMode.MARKDOWN); return
 
896
  logger.info(f"User {user.id} ({user.username or 'N/A'}) sent potential URL: {url}")
 
897
  context.user_data['url_to_summarize'] = url
898
  context.user_data['original_message_id'] = update.message.message_id
899
  keyboard = [[ InlineKeyboardButton("Paragraph Summary", callback_data="paragraph"), InlineKeyboardButton("Points Summary", callback_data="points") ]]
900
  reply_markup = InlineKeyboardMarkup(keyboard)
901
+ escaped_url = html.escape(url)
902
  await update.message.reply_html( f"Okay, I see this link:\n<code>{escaped_url}</code>\n\nHow would you like it summarised?", reply_markup=reply_markup, disable_web_page_preview=True )
903
 
904
  async def handle_summary_type_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
905
+ # ... (Keep existing implementation) ...
906
  query = update.callback_query
907
  if not query or not query.message or not query.from_user: logger.warning("Callback query missing data."); return
908
  user = query.from_user; summary_type = query.data; query_id = query.id
909
+ try: await query.answer(); logger.debug(f"Ack callback {query_id} from {user.id}")
910
+ except Exception as e: logger.warning(f"Error answering callback {query_id}: {e}")
911
 
912
  url = context.user_data.get('url_to_summarize')
913
+ message_id_to_edit = query.message.message_id
914
+ logger.info(f"User {user.id} chose '{summary_type}' for msg {message_id_to_edit}. URL in context: {'Yes' if url else 'No'}")
915
 
916
  if not url:
917
  logger.warning(f"No URL in context for user {user.id} (cb {query_id}). Expired?")
918
+ try: await query.edit_message_text(text="Sorry, context lost. Please send URL again.", reply_markup=None)
 
 
 
 
919
  except Exception as e: logger.error(f"Failed edit 'URL not found' msg: {e}")
920
+ return
921
 
 
922
  global TELEGRAM_TOKEN, _gemini_primary_enabled, _openrouter_fallback_enabled
923
+ if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing!"); try: await query.edit_message_text("❌ Bot config error.", reply_markup=None); except Exception: pass; return
924
+ if not _gemini_primary_enabled and not _openrouter_fallback_enabled: logger.critical("No AI models available!"); try: await query.edit_message_text("❌ AI config error.", reply_markup=None); except Exception: pass; return
925
+ elif not _gemini_primary_enabled: logger.warning("Primary AI unavailable, using fallback.")
926
+ elif not _openrouter_fallback_enabled: logger.warning("Fallback AI unavailable, using primary.")
 
 
 
 
 
 
 
 
927
 
 
928
  logger.info(f"Scheduling task for user {user.id}, chat {query.message.chat_id}, msg {message_id_to_edit} (URL: {url})")
929
+ asyncio.ensure_future( process_summary_task( user_id=user.id, chat_id=query.message.chat_id, message_id_to_edit=message_id_to_edit, url=url, summary_type=summary_type, bot_token=TELEGRAM_TOKEN ) )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
930
  context.user_data.pop('url_to_summarize', None)
931
  context.user_data.pop('original_message_id', None)
932
+ logger.debug(f"Cleared URL context for user {user.id} after scheduling.")
 
 
933
 
934
  async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
935
  # ... (Keep existing implementation) ...
 
950
  if not TELEGRAM_TOKEN: raise ValueError("TELEGRAM_TOKEN missing.")
951
  custom_request = HTTPXRequest( connect_timeout=10.0, read_timeout=30.0, write_timeout=30.0, pool_timeout=60.0 )
952
  application = ( Application.builder() .token(TELEGRAM_TOKEN) .request(custom_request) .build() )
 
953
  application.add_handler(CommandHandler("start", start))
954
  application.add_handler(CommandHandler("help", help_command))
955
  url_filter = filters.TEXT & ~filters.COMMAND & (filters.Entity("url") | filters.Entity("text_link") | filters.Regex(r'https?://[^\s]+'))
 
959
  logger.info("Telegram application handlers configured."); return application
960
 
961
  # --- ASGI Lifespan & Routes ---
962
+ # (lifespan, health_check, telegram_webhook) - No changes needed here
963
  @contextlib.asynccontextmanager
964
  async def lifespan(app: Starlette):
965
+ # ... (Keep existing implementation) ...
966
  global ptb_app, WEBHOOK_SECRET, TELEGRAM_TOKEN
967
  logger.info("ASGI Lifespan: Startup initiated...");
968
  if not TELEGRAM_TOKEN: logger.critical("TG TOKEN missing."); raise RuntimeError("Telegram token missing.")
969
  try:
970
  ptb_app = await setup_bot_config()
971
+ await ptb_app.initialize()
972
  bot_info = await ptb_app.bot.get_me()
973
  logger.info(f"Bot initialized: @{bot_info.username} (ID: {bot_info.id})")
974
 
975
+ # Webhook setup
976
  current_webhook_info = await ptb_app.bot.get_webhook_info()
977
+ webhook_delete_success = True
978
  if current_webhook_info and current_webhook_info.url:
979
  logger.info(f"Found existing webhook: {current_webhook_info.url}. Deleting...")
980
  try:
 
982
  else: logger.warning("Failed delete webhook (API returned False)."); webhook_delete_success = False
983
  except Exception as e: logger.warning(f"Could not delete webhook: {e}"); await asyncio.sleep(1); webhook_delete_success = False
984
 
 
985
  space_host = os.environ.get("SPACE_HOST")
986
+ webhook_path = "/webhook"
987
  full_webhook_url = None
988
  if space_host:
989
+ full_webhook_url = f"https://{space_host.split('://')[-1].rstrip('/')}{webhook_path}"
 
 
990
  logger.info(f"Using SPACE_HOST to determine webhook URL: {full_webhook_url}")
991
+ else: raise RuntimeError("Webhook URL undetermined (SPACE_HOST missing).")
 
 
992
 
 
993
  if full_webhook_url and webhook_delete_success:
994
  logger.info(f"Attempting to set webhook: {full_webhook_url}")
995
+ set_webhook_args = { "url": full_webhook_url, "allowed_updates": Update.ALL_TYPES, "drop_pending_updates": True }
996
+ if WEBHOOK_SECRET: set_webhook_args["secret_token"] = WEBHOOK_SECRET; logger.info("Webhook secret token will be used.")
997
+ await asyncio.sleep(1.0)
 
 
 
 
 
 
 
 
998
  try:
999
  webhook_set = await ptb_app.bot.set_webhook(**set_webhook_args)
1000
+ if not webhook_set: raise RuntimeError("set_webhook API call returned False.")
1001
+ await asyncio.sleep(1.5)
 
1002
  webhook_info = await ptb_app.bot.get_webhook_info()
1003
  if webhook_info and webhook_info.url == full_webhook_url:
1004
  logger.info(f"Webhook set and verified successfully: URL='{webhook_info.url}', Secret={'YES' if WEBHOOK_SECRET else 'NO'}")
1005
+ if webhook_info.last_error_message: logger.warning(f"Webhook status has last error: {webhook_info.last_error_message}")
1006
+ else: raise RuntimeError(f"Webhook verification failed! Expected '{full_webhook_url}', Got info: {webhook_info}")
1007
+ await ptb_app.start()
 
 
 
 
1008
  logger.info("PTB Application started (webhook mode).")
1009
+ except Exception as e: logger.error(f"FATAL: Failed to set or verify webhook: {e}", exc_info=True); raise RuntimeError(f"Failed to set/verify webhook: {e}") from e
1010
+ elif not webhook_delete_success: raise RuntimeError("Failed to delete previous webhook.")
 
 
 
 
 
1011
 
1012
+ logger.info("ASGI Lifespan: Startup complete."); yield
 
1013
 
1014
  except Exception as startup_err:
1015
  logger.critical(f"Application startup failed: {startup_err}", exc_info=True)
 
1016
  if ptb_app:
1017
  try:
1018
  if ptb_app.running: await ptb_app.stop()
 
1019
  if ptb_app._initialized: await ptb_app.shutdown()
1020
+ except Exception as shutdown_err: logger.error(f"Error during shutdown after startup failure: {shutdown_err}")
1021
+ raise
 
 
1022
  finally:
 
1023
  logger.info("ASGI Lifespan: Shutdown initiated...")
1024
  if ptb_app:
1025
  try:
1026
+ if ptb_app.running: await ptb_app.stop(); logger.info("PTB stopped.")
1027
+ if ptb_app._initialized: await ptb_app.shutdown(); logger.info("PTB shutdown.")
1028
+ except Exception as e: logger.error(f"Error during PTB shutdown: {e}", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
1029
  logger.info("ASGI Lifespan: Shutdown complete.")
1030
 
 
1031
  async def health_check(request: Request) -> PlainTextResponse:
1032
+ # ... (Keep existing implementation) ...
1033
  global OPENROUTER_MODEL, GEMINI_MODEL, APIFY_ACTOR_ID, _apify_token_exists, _gemini_primary_enabled, _openrouter_fallback_enabled, _crawl4ai_primary_web_enabled, _urltotext_fallback_enabled, SUPADATA_API_KEY
1034
+ bot_status = "Not Initialized"; bot_username = "N/A"
1035
+ if ptb_app and ptb_app.bot and ptb_app._initialized:
 
 
1036
  try:
 
1037
  wh_info = await ptb_app.bot.get_webhook_info()
 
1038
  if ptb_app.running and wh_info and wh_info.url:
1039
+ bot_info = await ptb_app.bot.get_me(); bot_username = f"@{bot_info.username}"
 
1040
  bot_status = f"Running (Webhook OK, {bot_username})"
1041
+ elif ptb_app.running: bot_status = f"Running (Webhook Status: {wh_info.url if wh_info else 'N/A'}, Last Error: {wh_info.last_error_message if wh_info else 'N/A'})"
 
1042
  else: bot_status = "Initialized/Not running"
1043
+ except Exception as e: logger.error(f"Error checking bot status: {e}", exc_info=True); bot_status = f"Error checking status: {e}"
1044
+ elif ptb_app: bot_status = "Initializing..."
 
 
 
1045
 
1046
  health_info = [
1047
+ f"=== Telegram Summary Bot Status ===", f"Bot Application: {bot_status}", "--- Services ---",
1048
+ f"Primary Web Scraper: {'Crawl4AI (ignore_robots=True)' if _crawl4ai_primary_web_enabled else 'DISABLED (Lib Missing)'}", # Note ignore_robots
 
 
1049
  f"Fallback Web Scraper 1: BeautifulSoup",
1050
  f"Fallback Web Scraper 2: {'urltotext.com API' if _urltotext_fallback_enabled else 'DISABLED (No Key)'}",
1051
  f"Primary Summarizer: {'Gemini (' + GEMINI_MODEL + ')' if _gemini_primary_enabled else 'DISABLED (No Key/Lib)'}",
1052
  f"Fallback Summarizer: {'OpenRouter (' + OPENROUTER_MODEL + ')' if _openrouter_fallback_enabled else 'DISABLED (No Key)'}",
1053
  f"Primary YT Transcript: youtube-transcript-api",
1054
  f"Fallback YT Transcript 1: {'Supadata API' if SUPADATA_API_KEY else 'DISABLED (No Key)'}",
1055
+ f"Fallback YT Transcript 2: {'Apify (' + APIFY_ACTOR_ID + ')' if _apify_token_exists else 'DISABLED (No Key)'}" ]
 
1056
  return PlainTextResponse("\n".join(health_info))
1057
 
 
1058
  async def telegram_webhook(request: Request) -> Response:
1059
  # ... (Keep existing implementation) ...
1060
  global WEBHOOK_SECRET, ptb_app
1061
+ if not ptb_app or not ptb_app._initialized or not ptb_app.running:
1062
+ status = "Not Initialized" if not ptb_app else ("Not Running (Initializing)" if not ptb_app.running else "Not Running")
1063
+ logger.error(f"Webhook received but PTB application {status}.")
1064
+ return PlainTextResponse(f'Bot {status}', status_code=503)
1065
+
 
 
 
 
 
 
1066
  if WEBHOOK_SECRET:
1067
  token_header = request.headers.get("X-Telegram-Bot-Api-Secret-Token")
1068
+ if token_header != WEBHOOK_SECRET: logger.warning(f"Webhook invalid secret token."); return Response(content="Invalid secret token", status_code=403)
 
 
1069
 
1070
  try:
 
1071
  update_data = await request.json()
1072
  update = Update.de_json(data=update_data, bot=ptb_app.bot)
1073
  logger.debug(f"Processing update_id: {update.update_id} via webhook")
 
1074
  await ptb_app.process_update(update)
1075
+ return Response(status_code=200)
1076
+ except json.JSONDecodeError: logger.error("Webhook invalid JSON."); return PlainTextResponse('Bad Request: Invalid JSON', status_code=400)
1077
+ except Exception as e: logger.error(f"Error processing webhook update: {e}", exc_info=True); return Response(status_code=200) # OK to TG
 
 
 
 
 
 
1078
 
1079
 
1080
  # --- ASGI App Definition ---
1081
+ app = Starlette( debug=False, lifespan=lifespan, routes=[ Route("/", endpoint=health_check, methods=["GET"]), Route("/webhook", endpoint=telegram_webhook, methods=["POST"]), ] )
 
 
 
 
 
 
 
 
1082
  logger.info("Starlette ASGI application created with native routes.")
1083
 
1084
 
1085
  # --- Development Runner ---
 
1086
  if __name__ == '__main__':
1087
+ # ... (Keep existing implementation) ...
1088
  import uvicorn
1089
  logger.warning("Running in development mode using Uvicorn directly - FOR LOCAL TESTING ONLY")
1090
  log_level = os.environ.get("LOGGING_LEVEL", "info").lower()
 
1091
  local_port = int(os.environ.get('PORT', 8080))
1092
+ try: from dotenv import load_dotenv; load_dotenv(); logger.info("Loaded .env file.")
1093
+ except ImportError: logger.info(".env not loaded.")
1094
+ if not get_secret('TELEGRAM_TOKEN'): logger.critical("Local Dev: TELEGRAM_TOKEN missing.")
1095
+ if not get_secret('GEMINI_API_KEY'): logger.error("Local Dev: GEMINI_API_KEY missing.")
1096
+ uvicorn.run( "main:app", host='0.0.0.0', port=local_port, log_level=log_level, reload=True )